Update docs: watcher fix, plugin capture fix (2026-02-25)

- Fixed watcher stuck on old session bug (restarted service)
- Fixed plugin capture 0 exchanges (added extractMessageText for OpenAI content arrays)
- Updated README, session.md, function_check.md, audit_checklist.md
- Verified: 9 exchanges captured per session
This commit is contained in:
root
2026-02-25 12:45:27 -06:00
parent abc5498f60
commit 87a390901d
8 changed files with 1234 additions and 202 deletions

View File

@@ -32,7 +32,7 @@ SCRIPT_DIR = Path(__file__).parent
DEFAULT_CONFIG = SCRIPT_DIR / "curator_config.json"
# Curator prompt path
CURATOR_PROMPT_PATH = Path("/root/.openclaw/workspace/.projects/true-recall-v2/curator-prompt.md")
CURATOR_PROMPT_PATH = Path("/root/.openclaw/workspace/.local_projects/true-recall-v2/curator-prompt.md")
def load_curator_prompt() -> str:
@@ -115,17 +115,38 @@ def extract_gems(memories: List[Dict[str, Any]], ollama_url: str) -> List[Dict[s
if not memories:
return []
prompt = load_curator_prompt()
# Build conversation from memories
# Build conversation from memories (support both 'text' and 'content' fields)
conversation_lines = []
for mem in memories:
role = mem.get("role", "unknown")
content = mem.get("content", "")
if content:
conversation_lines.append(f"{role}: {content}")
for i, mem in enumerate(memories):
# Support both migrated memories (text) and watcher memories (content)
text = mem.get("text", "") or mem.get("content", "")
if text:
# Truncate very long texts
text = text[:500] if len(text) > 500 else text
conversation_lines.append(f"[{i+1}] {text}")
conversation_text = "\n".join(conversation_lines)
conversation_text = "\n\n".join(conversation_lines)
# Simple extraction prompt
prompt = """You are a memory curator. Extract atomic facts from the conversation below.
For each distinct fact/decision/preference, output a JSON object with:
- "text": the atomic fact (1-2 sentences)
- "category": one of [decision, preference, technical, project, knowledge, system]
- "importance": "high" or "medium"
Return ONLY a JSON array. Example:
[
{"text": "User decided to use Redis for caching", "category": "decision", "importance": "high"},
{"text": "User prefers dark mode", "category": "preference", "importance": "medium"}
]
If no extractable facts, return [].
CONVERSATION:
"""
full_prompt = f"{prompt}{conversation_text}\n\nJSON:"
try:
response = requests.post(
@@ -133,7 +154,7 @@ def extract_gems(memories: List[Dict[str, Any]], ollama_url: str) -> List[Dict[s
json={
"model": "qwen3:30b-a3b-instruct-2507-q8_0",
"system": prompt,
"prompt": f"## Input Conversation\n\n{conversation_text}\n\n## Output\n",
"prompt": full_prompt,
"stream": False,
"options": {
"temperature": 0.1,
@@ -157,37 +178,17 @@ def extract_gems(memories: List[Dict[str, Any]], ollama_url: str) -> List[Dict[s
output = output.split('```')[1].split('```')[0].strip()
try:
# Find JSON array in output
start_idx = output.find('[')
end_idx = output.rfind(']')
if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
output = output[start_idx:end_idx+1]
# Fix common JSON issues from LLM output
# Replace problematic escape sequences
output = output.replace('\\n', '\n').replace('\\t', '\t')
# Fix single quotes in content that break JSON
output = output.replace("\\'", "'")
gems = json.loads(output)
if not isinstance(gems, list):
gems = [gems] if gems else []
return gems
except json.JSONDecodeError as e:
# Try to extract gems with regex fallback
import re
gem_matches = re.findall(r'"gem"\s*:\s*"([^"]+)"', output)
if gem_matches:
gems = []
for gem_text in gem_matches:
gems.append({
"gem": gem_text,
"context": "Extracted via fallback",
"categories": ["extracted"],
"importance": 3,
"confidence": 0.7
})
print(f"⚠️ Fallback extraction: {len(gems)} gems", file=sys.stderr)
return gems
print(f"Error parsing curator output: {e}", file=sys.stderr)
print(f"Raw output: {repr(output[:500])}...", file=sys.stderr)
return []
@@ -198,7 +199,7 @@ def get_embedding(text: str, ollama_url: str) -> Optional[List[float]]:
try:
response = requests.post(
f"{ollama_url}/api/embeddings",
json={"model": "mxbai-embed-large", "prompt": text},
json={"model": "snowflake-arctic-embed2", "prompt": text},
timeout=30
)
response.raise_for_status()
@@ -210,10 +211,19 @@ def get_embedding(text: str, ollama_url: str) -> Optional[List[float]]:
def store_gem(gem: Dict[str, Any], user_id: str, qdrant_url: str, target_collection: str, ollama_url: str) -> bool:
"""Store a single gem to Qdrant."""
embedding_text = f"{gem.get('gem', '')} {gem.get('context', '')} {gem.get('snippet', '')}"
# Support both old format (gem, context, snippet) and new format (text, category, importance)
embedding_text = gem.get('text', '') or gem.get('gem', '')
if not embedding_text:
embedding_text = f"{gem.get('gem', '')} {gem.get('context', '')} {gem.get('snippet', '')}".strip()
if not embedding_text:
print(f"⚠️ Empty embedding text for gem, skipping", file=sys.stderr)
return False
vector = get_embedding(embedding_text, ollama_url)
if vector is None:
print(f"⚠️ Failed to get embedding for gem", file=sys.stderr)
return False
# Generate ID
@@ -221,11 +231,18 @@ def store_gem(gem: Dict[str, Any], user_id: str, qdrant_url: str, target_collect
hash_bytes = hashlib.sha256(hash_content.encode()).digest()[:8]
gem_id = int.from_bytes(hash_bytes, byteorder='big') % (2**63)
# Normalize gem fields - ensure we have text field
payload = {
"user_id": user_id,
**gem,
"text": gem.get('text', gem.get('gem', '')),
"category": gem.get('category', 'general'),
"importance": gem.get('importance', 'medium'),
"curated_at": datetime.now(timezone.utc).isoformat()
}
# Preserve any other fields from gem
for key in ['context', 'snippet', 'confidence', 'conversation_id', 'turn_range']:
if key in gem:
payload[key] = gem[key]
try:
response = requests.put(