v2.0.1: Monthly curation now in curator_prompt.md, remove full_run_time/full_run_day config
This commit is contained in:
@@ -48,8 +48,7 @@ class Config:
|
||||
semantic_search_turns: int = 2
|
||||
semantic_score_threshold: float = 0.6 # Score threshold for semantic search
|
||||
run_time: str = "02:00" # Daily curator time
|
||||
full_run_time: str = "03:00" # Monthly full curator time
|
||||
full_run_day: int = 1 # Day of month for full run (1st)
|
||||
# Monthly mode is detected by curator_prompt.md (day 01)
|
||||
curator_model: str = "gpt-oss:120b"
|
||||
debug: bool = False
|
||||
cloud: CloudConfig = field(default_factory=CloudConfig)
|
||||
@@ -103,8 +102,6 @@ class Config:
|
||||
|
||||
if "curator" in data:
|
||||
config.run_time = data["curator"].get("run_time", config.run_time)
|
||||
config.full_run_time = data["curator"].get("full_run_time", config.full_run_time)
|
||||
config.full_run_day = data["curator"].get("full_run_day", config.full_run_day)
|
||||
config.curator_model = data["curator"].get("curator_model", config.curator_model)
|
||||
|
||||
if "cloud" in data:
|
||||
@@ -118,4 +115,4 @@ class Config:
|
||||
|
||||
return config
|
||||
|
||||
config = Config.load()
|
||||
config = Config.load()
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
"""Memory curator - runs daily (recent 24h) and monthly (full DB) to clean and maintain memory database.
|
||||
"""Memory curator - runs daily to clean and maintain memory database.
|
||||
|
||||
Creates INDIVIDUAL cleaned turns (one per raw turn), not merged summaries.
|
||||
Parses JSON response from curator_prompt.md format.
|
||||
On day 01 of each month, processes ALL raw memories (monthly mode).
|
||||
Otherwise, processes recent 24h of raw memories (daily mode).
|
||||
The prompt determines behavior based on current date.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
@@ -23,7 +24,6 @@ STATIC_DIR = Path(os.environ.get("VERA_STATIC_DIR", "/app/static"))
|
||||
|
||||
def load_curator_prompt() -> str:
|
||||
"""Load curator prompt from prompts directory."""
|
||||
# Try prompts directory first, then static for backward compatibility
|
||||
prompts_path = PROMPTS_DIR / "curator_prompt.md"
|
||||
static_path = STATIC_DIR / "curator_prompt.md"
|
||||
|
||||
@@ -42,16 +42,20 @@ class Curator:
|
||||
self.ollama_host = ollama_host
|
||||
self.curator_prompt = load_curator_prompt()
|
||||
|
||||
async def run(self, full: bool = False):
|
||||
async def run(self):
|
||||
"""Run the curation process.
|
||||
|
||||
Args:
|
||||
full: If True, process ALL raw memories (monthly full run).
|
||||
If False, process only recent 24h (daily run).
|
||||
Automatically detects day 01 for monthly mode (processes ALL raw memories).
|
||||
Otherwise runs daily mode (processes recent 24h only).
|
||||
The prompt determines behavior based on current date.
|
||||
"""
|
||||
logger.info(f"Starting memory curation (full={full})...")
|
||||
current_date = datetime.utcnow()
|
||||
is_monthly = current_date.day == 1
|
||||
mode = "MONTHLY" if is_monthly else "DAILY"
|
||||
|
||||
logger.info(f"Starting memory curation ({mode} mode)...")
|
||||
try:
|
||||
current_date = datetime.utcnow().strftime("%Y-%m-%d")
|
||||
current_date_str = current_date.strftime("%Y-%m-%d")
|
||||
|
||||
# Get all memories (async)
|
||||
points, _ = await self.qdrant.client.scroll(
|
||||
@@ -77,15 +81,15 @@ class Curator:
|
||||
|
||||
logger.info(f"Found {len(raw_memories)} raw, {len(curated_memories)} curated")
|
||||
|
||||
# Filter by time for daily runs, process all for full runs
|
||||
if full:
|
||||
# Filter by time for daily mode, process all for monthly mode
|
||||
if is_monthly:
|
||||
# Monthly full run: process ALL raw memories
|
||||
recent_raw = raw_memories
|
||||
logger.info(f"FULL RUN: Processing all {len(recent_raw)} raw memories")
|
||||
logger.info(f"MONTHLY MODE: Processing all {len(recent_raw)} raw memories")
|
||||
else:
|
||||
# Daily run: process only recent 24h
|
||||
recent_raw = [m for m in raw_memories if self._is_recent(m, hours=24)]
|
||||
logger.info(f"DAILY RUN: Processing {len(recent_raw)} recent raw memories")
|
||||
logger.info(f"DAILY MODE: Processing {len(recent_raw)} recent raw memories")
|
||||
|
||||
existing_sample = curated_memories[-50:] if len(curated_memories) > 50 else curated_memories
|
||||
|
||||
@@ -96,10 +100,10 @@ class Curator:
|
||||
raw_turns_text = self._format_raw_turns(recent_raw)
|
||||
existing_text = self._format_existing_memories(existing_sample)
|
||||
|
||||
prompt = self.curator_prompt.replace("{CURRENT_DATE}", current_date)
|
||||
prompt = self.curator_prompt.replace("{CURRENT_DATE}", current_date_str)
|
||||
full_prompt = f"""{prompt}
|
||||
|
||||
## {'All' if full else 'Recent'} Raw Turns ({'full database' if full else 'last 24 hours'}):
|
||||
## {'All' if is_monthly else 'Recent'} Raw Turns ({'full database' if is_monthly else 'last 24 hours'}):
|
||||
{raw_turns_text}
|
||||
|
||||
## Existing Memories (sample):
|
||||
@@ -152,20 +156,12 @@ Remember: Respond with ONLY valid JSON. No markdown, no explanations, just the J
|
||||
await self.qdrant.delete_points(raw_ids_to_delete)
|
||||
logger.info(f"Deleted {len(raw_ids_to_delete)} processed raw memories")
|
||||
|
||||
logger.info(f"Memory curation completed successfully (full={full})")
|
||||
logger.info(f"Memory curation completed successfully ({mode} mode)")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during curation: {e}")
|
||||
raise
|
||||
|
||||
async def run_full(self):
|
||||
"""Run full curation (all raw memories). Convenience method."""
|
||||
await self.run(full=True)
|
||||
|
||||
async def run_daily(self):
|
||||
"""Run daily curation (recent 24h only). Convenience method."""
|
||||
await self.run(full=False)
|
||||
|
||||
def _is_recent(self, memory: Dict, hours: int = 24) -> bool:
|
||||
"""Check if memory is within the specified hours."""
|
||||
timestamp = memory.get("timestamp", "")
|
||||
@@ -236,7 +232,9 @@ Remember: Respond with ONLY valid JSON. No markdown, no explanations, just the J
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
json_match = re.search(r'```(?:json)?\s*([\s\S]*?)```', response)
|
||||
# Try to find JSON in code blocks
|
||||
pattern = r'```(?:json)?\s*([\s\S]*?)```'
|
||||
json_match = re.search(pattern, response)
|
||||
if json_match:
|
||||
try:
|
||||
return json.loads(json_match.group(1).strip())
|
||||
@@ -248,7 +246,6 @@ Remember: Respond with ONLY valid JSON. No markdown, no explanations, just the J
|
||||
|
||||
async def _append_rule_to_file(self, filename: str, rule: str):
|
||||
"""Append a permanent rule to a prompts file."""
|
||||
# Try prompts directory first, then static for backward compatibility
|
||||
prompts_path = PROMPTS_DIR / filename
|
||||
static_path = STATIC_DIR / filename
|
||||
|
||||
|
||||
56
app/main.py
56
app/main.py
@@ -20,25 +20,19 @@ curator = None
|
||||
|
||||
|
||||
async def run_curator():
|
||||
"""Scheduled daily curator job (recent 24h)."""
|
||||
"""Scheduled daily curator job.
|
||||
|
||||
Runs every day at configured time. The curator itself detects
|
||||
if it's day 01 (monthly mode) and processes all memories.
|
||||
Otherwise processes recent 24h only.
|
||||
"""
|
||||
global curator
|
||||
logger.info("Starting daily memory curation...")
|
||||
logger.info("Starting memory curation...")
|
||||
try:
|
||||
await curator.run_daily()
|
||||
logger.info("Daily memory curation completed successfully")
|
||||
await curator.run()
|
||||
logger.info("Memory curation completed successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Daily memory curation failed: {e}")
|
||||
|
||||
|
||||
async def run_curator_full():
|
||||
"""Scheduled monthly curator job (full database)."""
|
||||
global curator
|
||||
logger.info("Starting monthly full memory curation...")
|
||||
try:
|
||||
await curator.run_full()
|
||||
logger.info("Monthly full memory curation completed successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Monthly full memory curation failed: {e}")
|
||||
logger.error(f"Memory curation failed: {e}")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
@@ -59,23 +53,12 @@ async def lifespan(app: FastAPI):
|
||||
ollama_host=config.ollama_host
|
||||
)
|
||||
|
||||
# Schedule daily curator (recent 24h)
|
||||
# Schedule daily curator
|
||||
# Note: Monthly mode is detected automatically by curator_prompt.md (day 01)
|
||||
hour, minute = map(int, config.run_time.split(":"))
|
||||
scheduler.add_job(run_curator, "cron", hour=hour, minute=minute, id="daily_curator")
|
||||
logger.info(f"Daily curator scheduled at {config.run_time}")
|
||||
|
||||
# Schedule monthly full curator (all raw memories)
|
||||
full_hour, full_minute = map(int, config.full_run_time.split(":"))
|
||||
scheduler.add_job(
|
||||
run_curator_full,
|
||||
"cron",
|
||||
day=config.full_run_day,
|
||||
hour=full_hour,
|
||||
minute=full_minute,
|
||||
id="monthly_curator"
|
||||
)
|
||||
logger.info(f"Monthly full curator scheduled on day {config.full_run_day} at {config.full_run_time}")
|
||||
|
||||
scheduler.start()
|
||||
|
||||
yield
|
||||
@@ -141,16 +124,11 @@ async def proxy_all(request: Request, path: str):
|
||||
|
||||
|
||||
@app.post("/curator/run")
|
||||
async def trigger_curator(full: bool = False):
|
||||
async def trigger_curator():
|
||||
"""Manually trigger curator.
|
||||
|
||||
Args:
|
||||
full: If True, run full curation (all raw memories).
|
||||
If False (default), run daily curation (recent 24h).
|
||||
The curator will automatically detect if it's day 01 (monthly mode)
|
||||
and process all memories. Otherwise processes recent 24h.
|
||||
"""
|
||||
if full:
|
||||
await run_curator_full()
|
||||
return {"status": "full curation completed"}
|
||||
else:
|
||||
await run_curator()
|
||||
return {"status": "daily curation completed"}
|
||||
await run_curator()
|
||||
return {"status": "curation completed"}
|
||||
|
||||
Reference in New Issue
Block a user