Update docs: watcher fix, plugin capture fix (2026-02-25)

- Fixed watcher stuck on old session bug (restarted service) - Fixed plugin capture 0 exchanges (added extractMessageText for OpenAI content arrays) - Updated README, session.md, function_check.md, audit_checklist.md - Verified: 9 exchanges captured per session
2026-02-25 12:45:27 -06:00
parent abc5498f60
commit 87a390901d
8 changed files with 1234 additions and 202 deletions
--- a/tr-continuous/curator_timer.py
+++ b/tr-continuous/curator_timer.py
@@ -32,7 +32,7 @@ SCRIPT_DIR = Path(__file__).parent
 DEFAULT_CONFIG = SCRIPT_DIR / "curator_config.json"

 # Curator prompt path
-CURATOR_PROMPT_PATH = Path("/root/.openclaw/workspace/.projects/true-recall-v2/curator-prompt.md")
+CURATOR_PROMPT_PATH = Path("/root/.openclaw/workspace/.local_projects/true-recall-v2/curator-prompt.md")


 def load_curator_prompt() -> str:
@@ -115,17 +115,38 @@ def extract_gems(memories: List[Dict[str, Any]], ollama_url: str) -> List[Dict[s
    if not memories:
        return []
    
-    prompt = load_curator_prompt()
-    
-    # Build conversation from memories
+    # Build conversation from memories (support both 'text' and 'content' fields)
    conversation_lines = []
-    for mem in memories:
-        role = mem.get("role", "unknown")
-        content = mem.get("content", "")
-        if content:
-            conversation_lines.append(f"{role}: {content}")
+    for i, mem in enumerate(memories):
+        # Support both migrated memories (text) and watcher memories (content)
+        text = mem.get("text", "") or mem.get("content", "")
+        if text:
+            # Truncate very long texts
+            text = text[:500] if len(text) > 500 else text
+            conversation_lines.append(f"[{i+1}] {text}")
    
-    conversation_text = "\n".join(conversation_lines)
+    conversation_text = "\n\n".join(conversation_lines)
+    
+    # Simple extraction prompt
+    prompt = """You are a memory curator. Extract atomic facts from the conversation below.
+
+For each distinct fact/decision/preference, output a JSON object with:
+- "text": the atomic fact (1-2 sentences)
+- "category": one of [decision, preference, technical, project, knowledge, system]
+- "importance": "high" or "medium"
+
+Return ONLY a JSON array. Example:
+[
+  {"text": "User decided to use Redis for caching", "category": "decision", "importance": "high"},
+  {"text": "User prefers dark mode", "category": "preference", "importance": "medium"}
+]
+
+If no extractable facts, return [].
+
+CONVERSATION:
+"""
+    
+    full_prompt = f"{prompt}{conversation_text}\n\nJSON:"
    
    try:
        response = requests.post(
@@ -133,7 +154,7 @@ def extract_gems(memories: List[Dict[str, Any]], ollama_url: str) -> List[Dict[s
            json={
                "model": "qwen3:30b-a3b-instruct-2507-q8_0",
                "system": prompt,
-                "prompt": f"## Input Conversation\n\n{conversation_text}\n\n## Output\n",
+                "prompt": full_prompt,
                "stream": False,
                "options": {
                    "temperature": 0.1,
@@ -157,37 +178,17 @@ def extract_gems(memories: List[Dict[str, Any]], ollama_url: str) -> List[Dict[s
        output = output.split('```')[1].split('```')[0].strip()
    
    try:
+        # Find JSON array in output
        start_idx = output.find('[')
        end_idx = output.rfind(']')
        if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
            output = output[start_idx:end_idx+1]
        
-        # Fix common JSON issues from LLM output
-        # Replace problematic escape sequences
-        output = output.replace('\\n', '\n').replace('\\t', '\t')
-        # Fix single quotes in content that break JSON
-        output = output.replace("\\'", "'")
-        
        gems = json.loads(output)
        if not isinstance(gems, list):
            gems = [gems] if gems else []
        return gems
    except json.JSONDecodeError as e:
-        # Try to extract gems with regex fallback
-        import re
-        gem_matches = re.findall(r'"gem"\s*:\s*"([^"]+)"', output)
-        if gem_matches:
-            gems = []
-            for gem_text in gem_matches:
-                gems.append({
-                    "gem": gem_text,
-                    "context": "Extracted via fallback",
-                    "categories": ["extracted"],
-                    "importance": 3,
-                    "confidence": 0.7
-                })
-            print(f"⚠️ Fallback extraction: {len(gems)} gems", file=sys.stderr)
-            return gems
        print(f"Error parsing curator output: {e}", file=sys.stderr)
        print(f"Raw output: {repr(output[:500])}...", file=sys.stderr)
        return []
@@ -198,7 +199,7 @@ def get_embedding(text: str, ollama_url: str) -> Optional[List[float]]:
    try:
        response = requests.post(
            f"{ollama_url}/api/embeddings",
-            json={"model": "mxbai-embed-large", "prompt": text},
+            json={"model": "snowflake-arctic-embed2", "prompt": text},
            timeout=30
        )
        response.raise_for_status()
@@ -210,10 +211,19 @@ def get_embedding(text: str, ollama_url: str) -> Optional[List[float]]:

 def store_gem(gem: Dict[str, Any], user_id: str, qdrant_url: str, target_collection: str, ollama_url: str) -> bool:
    """Store a single gem to Qdrant."""
-    embedding_text = f"{gem.get('gem', '')} {gem.get('context', '')} {gem.get('snippet', '')}"
+    # Support both old format (gem, context, snippet) and new format (text, category, importance)
+    embedding_text = gem.get('text', '') or gem.get('gem', '')
+    if not embedding_text:
+        embedding_text = f"{gem.get('gem', '')} {gem.get('context', '')} {gem.get('snippet', '')}".strip()
+    
+    if not embedding_text:
+        print(f"⚠️ Empty embedding text for gem, skipping", file=sys.stderr)
+        return False
+    
    vector = get_embedding(embedding_text, ollama_url)
    
    if vector is None:
+        print(f"⚠️ Failed to get embedding for gem", file=sys.stderr)
        return False
    
    # Generate ID
@@ -221,11 +231,18 @@ def store_gem(gem: Dict[str, Any], user_id: str, qdrant_url: str, target_collect
    hash_bytes = hashlib.sha256(hash_content.encode()).digest()[:8]
    gem_id = int.from_bytes(hash_bytes, byteorder='big') % (2**63)
    
+    # Normalize gem fields - ensure we have text field
    payload = {
        "user_id": user_id,
-        **gem,
+        "text": gem.get('text', gem.get('gem', '')),
+        "category": gem.get('category', 'general'),
+        "importance": gem.get('importance', 'medium'),
        "curated_at": datetime.now(timezone.utc).isoformat()
    }
+    # Preserve any other fields from gem
+    for key in ['context', 'snippet', 'confidence', 'conversation_id', 'turn_range']:
+        if key in gem:
+            payload[key] = gem[key]
    
    try:
        response = requests.put(