Initial commit: TrueRecall v2.2 with 30b curator and timer-based curation

This commit is contained in:
root
2026-02-24 20:27:44 -06:00
commit 8bb1abaf18
23 changed files with 4112 additions and 0 deletions

101
tr-continuous/README.md Normal file
View File

@@ -0,0 +1,101 @@
# Turn-Based Curator
Extract gems every N turns instead of waiting for daily curation.
## Files
| File | Purpose |
|------|---------|
| `curator_turn_based.py` | Main script - checks turn count, extracts gems |
| `curator_cron.sh` | Cron wrapper to run every minute |
| `turn-curator.service` | Alternative systemd service (runs on-demand) |
## Usage
### Manual Run
```bash
# Check current status
python3 curator_turn_based.py --status
# Preview what would be curated
python3 curator_turn_based.py --threshold 10 --dry-run
# Execute curation
python3 curator_turn_based.py --threshold 10 --execute
```
### Automatic (Cron)
Add to crontab:
```bash
* * * * * /root/.openclaw/workspace/.projects/true-recall-v2/tr-continuous/curator_cron.sh
```
Or use systemd timer:
```bash
sudo cp turn-curator.service /etc/systemd/system/
sudo systemctl enable turn-curator.timer # If you create a timer
```
### Automatic (Integrated)
Alternative: Modify `realtime_qdrant_watcher.py` to trigger curation every 10 turns.
## How It Works
1. **Tracks turn count** - Stores last curation turn in `/tmp/curator_turn_state.json`
2. **Monitors delta** - Compares current turn count vs last curation
3. **Triggers at threshold** - When 10+ new turns exist, runs curation
4. **Extracts gems** - Sends conversation to qwen3, gets gems
5. **Stores results** - Saves gems to `gems_tr` collection
## State File
`/tmp/curator_turn_state.json`:
```json
{
"last_turn": 150,
"last_curation": "2026-02-24T17:00:00Z"
}
```
## Comparison with Daily Curator
| Feature | Daily Curator | Turn-Based Curator |
|---------|--------------|-------------------|
| Schedule | 2:45 AM daily | Every 10 turns (dynamic) |
| Time window | 24 hours | Variable (depends on chat frequency) |
| Trigger | Cron | Turn threshold |
| Use case | Nightly batch | Real-time-ish extraction |
| Overlap | Low | Possible with daily curator |
## Recommendation
Use **BOTH**:
- **Turn-based**: Every 10 turns for active conversations
- **Daily**: 2:45 AM as backup/catch-all
They'll deduplicate automatically (same embeddings → skipped).
## Testing
```bash
# Simulate 10 turns
for i in {1..10}; do
echo "Test message $i" > /dev/null
done
# Check status
python3 curator_turn_based.py --status
# Run manually
python3 curator_turn_based.py --threshold 10 --execute
```
## Status
- ✅ Script created: `curator_turn_based.py`
- ✅ Cron wrapper: `curator_cron.sh`
- ⏳ Deployment: Optional (manual or cron)
- ⏳ Testing: Pending

View File

@@ -0,0 +1,194 @@
#!/usr/bin/env python3
"""
Turn-Based Curator: Extract gems every N new memories (turns).
Usage:
python3 curator_by_count.py --threshold 10 --dry-run
python3 curator_by_count.py --threshold 10 --execute
python3 curator_by_count.py --status
"""
import argparse
import json
import requests
import sys
from datetime import datetime, timezone, timedelta
from pathlib import Path
QDRANT_URL = "http://10.0.0.40:6333"
MEMORIES = "memories_tr"
GEMS = "gems_tr"
OLLAMA = "http://10.0.0.10:11434"
MODEL = "ollama-remote/qwen3:30b-a3b-instruct-2507-q8_0"
STATE_FILE = Path("/tmp/curator_count_state.json")
def load_state():
if STATE_FILE.exists():
with open(STATE_FILE) as f:
return json.load(f)
return {"last_count": 0, "last_time": None}
def save_state(state):
with open(STATE_FILE, 'w') as f:
json.dump(state, f)
def get_total_count():
try:
r = requests.get(f"{QDRANT_URL}/collections/{MEMORIES}", timeout=10)
return r.json()["result"]["points_count"]
except:
return 0
def get_recent_memories(hours=1):
"""Get memories from last N hours."""
since = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
try:
r = requests.post(
f"{QDRANT_URL}/collections/{MEMORIES}/points/scroll",
json={"limit": 1000, "with_payload": True},
timeout=30
)
points = r.json()["result"]["points"]
# Filter by timestamp
recent = [p for p in points if p.get("payload", {}).get("timestamp", "") > since]
return recent
except:
return []
def extract_gems(memories):
"""Send to LLM for gem extraction."""
if not memories:
return []
# Build conversation
parts = []
for m in memories:
role = m["payload"].get("role", "unknown")
content = m["payload"].get("content", "")[:500] # Limit per message
parts.append(f"{role.upper()}: {content}")
conversation = "\n\n".join(parts[:20]) # Max 20 messages
prompt = f"""Extract 3-5 key gems (insights, decisions, facts) from this conversation.
Conversation:
{conversation}
Return JSON: [{{"text": "gem", "category": "decision|fact|preference"}}]"""
try:
r = requests.post(
f"{OLLAMA}/v1/chat/completions",
json={
"model": MODEL,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.3
},
timeout=120
)
content = r.json()["choices"][0]["message"]["content"]
# Parse JSON
start = content.find('[')
end = content.rfind(']')
if start >= 0 and end > start:
return json.loads(content[start:end+1])
except:
pass
return []
def store_gem(gem):
"""Store gem to gems_tr."""
try:
# Get embedding
r = requests.post(
f"{OLLAMA}/api/embeddings",
json={"model": "snowflake-arctic-embed2", "prompt": gem["text"]},
timeout=30
)
vector = r.json()["embedding"]
# Store
r = requests.put(
f"{QDRANT_URL}/collections/{GEMS}/points",
json={
"points": [{
"id": abs(hash(gem["text"])) % (2**63),
"vector": vector,
"payload": {
"text": gem["text"],
"category": gem.get("category", "other"),
"createdAt": datetime.now(timezone.utc).isoformat(),
"source": "turn_curator"
}
}]
},
timeout=30
)
return r.status_code == 200
except:
return False
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--threshold", "-t", type=int, default=10)
parser.add_argument("--execute", "-e", action="store_true")
parser.add_argument("--dry-run", "-n", action="store_true")
parser.add_argument("--status", "-s", action="store_true")
args = parser.parse_args()
state = load_state()
current = get_total_count()
new_points = current - state.get("last_count", 0)
if args.status:
print(f"Total memories: {current}")
print(f"Last curated: {state.get('last_count', 0)}")
print(f"New since last: {new_points}")
print(f"Threshold: {args.threshold}")
print(f"Ready: {'YES' if new_points >= args.threshold else 'NO'}")
return
print(f"Curator: {new_points} new / {args.threshold} threshold")
if new_points < args.threshold:
print("Not enough new memories")
return
# Get recent memories (last hour should cover the new points)
memories = get_recent_memories(hours=1)
print(f"Fetched {len(memories)} recent memories")
if not memories:
print("No memories to process")
return
if args.dry_run:
print(f"[DRY RUN] Would process {len(memories)} memories")
return
if not args.execute:
print("Use --execute to run or --dry-run to preview")
return
# Extract gems
print("Extracting gems...")
gems = extract_gems(memories)
print(f"Extracted {len(gems)} gems")
# Store
success = 0
for gem in gems:
if store_gem(gem):
success += 1
print(f" Stored: {gem['text'][:60]}...")
# Update state
state["last_count"] = current
state["last_time"] = datetime.now(timezone.utc).isoformat()
save_state(state)
print(f"Done: {success}/{len(gems)} gems stored")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,7 @@
{
"timer_minutes": 5,
"max_batch_size": 100,
"user_id": "rob",
"source_collection": "memories_tr",
"target_collection": "gems_tr"
}

View File

@@ -0,0 +1,12 @@
#!/bin/bash
# Turn-based curator cron - runs every minute to check if 10 turns reached
SCRIPT_DIR="/root/.openclaw/workspace/.projects/true-recall-v2/tr-continuous"
# Check if enough turns accumulated
/usr/bin/python3 "${SCRIPT_DIR}/curator_turn_based.py" --threshold 10 --status 2>/dev/null | grep -q "Ready to curate: YES"
if [ $? -eq 0 ]; then
# Run curation
/usr/bin/python3 "${SCRIPT_DIR}/curator_turn_based.py" --threshold 10 --execute 2>&1 | logger -t turn-curator
fi

350
tr-continuous/curator_timer.py Executable file
View File

@@ -0,0 +1,350 @@
#!/usr/bin/env python3
"""
TrueRecall Timer Curator: Runs every 30 minutes via cron.
- Queries all uncurated memories from memories_tr
- Sends batch to qwen3 for gem extraction
- Stores gems to gems_tr
- Marks processed memories as curated=true
Usage:
python3 curator_timer.py --config curator_config.json
python3 curator_timer.py --config curator_config.json --dry-run
"""
import os
import sys
import json
import argparse
import requests
from datetime import datetime, timezone
from pathlib import Path
from typing import List, Dict, Any, Optional
import hashlib
# Load config
def load_config(config_path: str) -> Dict[str, Any]:
with open(config_path, 'r') as f:
return json.load(f)
# Default paths
SCRIPT_DIR = Path(__file__).parent
DEFAULT_CONFIG = SCRIPT_DIR / "curator_config.json"
# Curator prompt path
CURATOR_PROMPT_PATH = Path("/root/.openclaw/workspace/.projects/true-recall-v2/curator-prompt.md")
def load_curator_prompt() -> str:
"""Load the curator system prompt."""
try:
with open(CURATOR_PROMPT_PATH, 'r') as f:
return f.read()
except FileNotFoundError:
print(f"⚠️ Curator prompt not found at {CURATOR_PROMPT_PATH}")
return """You are The Curator. Extract meaningful gems from conversation history.
Extract facts, insights, decisions, preferences, and context that would be valuable to remember.
Output a JSON array of gems with fields: gem, context, snippet, categories, importance (1-5), confidence (0-0.99)."""
def get_uncurated_memories(qdrant_url: str, collection: str, user_id: str, max_batch: int) -> List[Dict[str, Any]]:
"""Query Qdrant for uncurated memories."""
filter_data = {
"must": [
{"key": "user_id", "match": {"value": user_id}},
{"key": "curated", "match": {"value": False}}
]
}
all_points = []
offset = None
iterations = 0
max_iterations = 10
while len(all_points) < max_batch and iterations < max_iterations:
iterations += 1
scroll_data = {
"limit": min(100, max_batch - len(all_points)),
"with_payload": True,
"filter": filter_data
}
if offset:
scroll_data["offset"] = offset
try:
response = requests.post(
f"{qdrant_url}/collections/{collection}/points/scroll",
json=scroll_data,
headers={"Content-Type": "application/json"},
timeout=30
)
response.raise_for_status()
result = response.json()
points = result.get("result", {}).get("points", [])
if not points:
break
all_points.extend(points)
offset = result.get("result", {}).get("next_page_offset")
if not offset:
break
except Exception as e:
print(f"Error querying Qdrant: {e}", file=sys.stderr)
break
# Convert to simple dicts
memories = []
for point in all_points:
payload = point.get("payload", {})
memories.append({
"id": point.get("id"),
"content": payload.get("content", ""),
"role": payload.get("role", ""),
"timestamp": payload.get("timestamp", ""),
"turn": payload.get("turn", 0),
**payload
})
return memories[:max_batch]
def extract_gems(memories: List[Dict[str, Any]], ollama_url: str) -> List[Dict[str, Any]]:
"""Send memories to qwen3 for gem extraction."""
if not memories:
return []
prompt = load_curator_prompt()
# Build conversation from memories
conversation_lines = []
for mem in memories:
role = mem.get("role", "unknown")
content = mem.get("content", "")
if content:
conversation_lines.append(f"{role}: {content}")
conversation_text = "\n".join(conversation_lines)
try:
response = requests.post(
f"{ollama_url}/api/generate",
json={
"model": "qwen3:30b-a3b-instruct-2507-q8_0",
"system": prompt,
"prompt": f"## Input Conversation\n\n{conversation_text}\n\n## Output\n",
"stream": False,
"options": {
"temperature": 0.1,
"num_predict": 4000
}
},
timeout=120
)
response.raise_for_status()
except Exception as e:
print(f"Error calling Ollama: {e}", file=sys.stderr)
return []
result = response.json()
output = result.get('response', '').strip()
# Extract JSON from output
if '```json' in output:
output = output.split('```json')[1].split('```')[0].strip()
elif '```' in output:
output = output.split('```')[1].split('```')[0].strip()
try:
start_idx = output.find('[')
end_idx = output.rfind(']')
if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
output = output[start_idx:end_idx+1]
# Fix common JSON issues from LLM output
# Replace problematic escape sequences
output = output.replace('\\n', '\n').replace('\\t', '\t')
# Fix single quotes in content that break JSON
output = output.replace("\\'", "'")
gems = json.loads(output)
if not isinstance(gems, list):
gems = [gems] if gems else []
return gems
except json.JSONDecodeError as e:
# Try to extract gems with regex fallback
import re
gem_matches = re.findall(r'"gem"\s*:\s*"([^"]+)"', output)
if gem_matches:
gems = []
for gem_text in gem_matches:
gems.append({
"gem": gem_text,
"context": "Extracted via fallback",
"categories": ["extracted"],
"importance": 3,
"confidence": 0.7
})
print(f"⚠️ Fallback extraction: {len(gems)} gems", file=sys.stderr)
return gems
print(f"Error parsing curator output: {e}", file=sys.stderr)
print(f"Raw output: {repr(output[:500])}...", file=sys.stderr)
return []
def get_embedding(text: str, ollama_url: str) -> Optional[List[float]]:
"""Get embedding from Ollama."""
try:
response = requests.post(
f"{ollama_url}/api/embeddings",
json={"model": "mxbai-embed-large", "prompt": text},
timeout=30
)
response.raise_for_status()
return response.json()['embedding']
except Exception as e:
print(f"Error getting embedding: {e}", file=sys.stderr)
return None
def store_gem(gem: Dict[str, Any], user_id: str, qdrant_url: str, target_collection: str, ollama_url: str) -> bool:
"""Store a single gem to Qdrant."""
embedding_text = f"{gem.get('gem', '')} {gem.get('context', '')} {gem.get('snippet', '')}"
vector = get_embedding(embedding_text, ollama_url)
if vector is None:
return False
# Generate ID
hash_content = f"{user_id}:{gem.get('conversation_id', '')}:{gem.get('turn_range', '')}:{gem.get('gem', '')[:50]}"
hash_bytes = hashlib.sha256(hash_content.encode()).digest()[:8]
gem_id = int.from_bytes(hash_bytes, byteorder='big') % (2**63)
payload = {
"user_id": user_id,
**gem,
"curated_at": datetime.now(timezone.utc).isoformat()
}
try:
response = requests.put(
f"{qdrant_url}/collections/{target_collection}/points",
json={
"points": [{
"id": abs(gem_id),
"vector": vector,
"payload": payload
}]
},
timeout=30
)
response.raise_for_status()
return True
except Exception as e:
print(f"Error storing gem: {e}", file=sys.stderr)
return False
def mark_curated(memory_ids: List, qdrant_url: str, collection: str) -> bool:
"""Mark memories as curated in Qdrant using POST /points/payload format."""
if not memory_ids:
return True
try:
response = requests.post(
f"{qdrant_url}/collections/{collection}/points/payload",
json={
"points": memory_ids,
"payload": {
"curated": True,
"curated_at": datetime.now(timezone.utc).isoformat()
}
},
timeout=30
)
response.raise_for_status()
return True
except Exception as e:
print(f"Error marking curated: {e}", file=sys.stderr)
return False
def main():
parser = argparse.ArgumentParser(description="TrueRecall Timer Curator")
parser.add_argument("--config", "-c", default=str(DEFAULT_CONFIG), help="Config file path")
parser.add_argument("--dry-run", "-n", action="store_true", help="Don't write, just preview")
args = parser.parse_args()
config = load_config(args.config)
qdrant_url = os.getenv("QDRANT_URL", "http://10.0.0.40:6333")
ollama_url = os.getenv("OLLAMA_URL", "http://10.0.0.10:11434")
user_id = config.get("user_id", "rob")
source_collection = config.get("source_collection", "memories_tr")
target_collection = config.get("target_collection", "gems_tr")
max_batch = config.get("max_batch_size", 100)
print(f"🔍 TrueRecall Timer Curator")
print(f"👤 User: {user_id}")
print(f"📥 Source: {source_collection}")
print(f"💎 Target: {target_collection}")
print(f"📦 Max batch: {max_batch}")
if args.dry_run:
print("🏃 DRY RUN MODE")
print()
# Get uncurated memories
print("📥 Fetching uncurated memories...")
memories = get_uncurated_memories(qdrant_url, source_collection, user_id, max_batch)
print(f"✅ Found {len(memories)} uncurated memories")
if not memories:
print("🤷 Nothing to curate. Exiting.")
return
# Extract gems
print(f"\n🧠 Sending {len(memories)} memories to curator...")
gems = extract_gems(memories, ollama_url)
print(f"✅ Extracted {len(gems)} gems")
if not gems:
print("⚠️ No gems extracted. Nothing to store.")
# Still mark as curated so we don't reprocess
memory_ids = [m["id"] for m in memories] # Keep as integers
mark_curated(memory_ids, qdrant_url, source_collection)
return
# Preview
print("\n💎 Gems preview:")
for i, gem in enumerate(gems[:3], 1):
print(f" {i}. {gem.get('gem', 'N/A')[:80]}...")
if len(gems) > 3:
print(f" ... and {len(gems) - 3} more")
if args.dry_run:
print("\n🏃 DRY RUN: Not storing gems or marking curated.")
return
# Store gems
print(f"\n💾 Storing {len(gems)} gems...")
stored = 0
for gem in gems:
if store_gem(gem, user_id, qdrant_url, target_collection, ollama_url):
stored += 1
print(f"✅ Stored: {stored}/{len(gems)}")
# Mark memories as curated
print("\n📝 Marking memories as curated...")
memory_ids = [m["id"] for m in memories] # Keep as integers
if mark_curated(memory_ids, qdrant_url, source_collection):
print(f"✅ Marked {len(memory_ids)} memories as curated")
else:
print(f"⚠️ Failed to mark some memories as curated")
print("\n🎉 Curation complete!")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,291 @@
#!/usr/bin/env python3
"""
Turn-Based Curator: Extract gems every N turns (instead of daily).
Usage:
python3 curator_turn_based.py --threshold 10 --dry-run
python3 curator_turn_based.py --threshold 10 --execute
python3 curator_turn_based.py --status # Show turn counts
This tracks turn count since last curation and runs when threshold is reached.
"""
import argparse
import json
import os
import requests
import sys
from datetime import datetime, timezone, timedelta
from pathlib import Path
from typing import List, Dict, Any, Optional
# Config
QDRANT_URL = "http://10.0.0.40:6333"
MEMORIES_COLLECTION = "memories_tr"
GEMS_COLLECTION = "gems_tr"
OLLAMA_URL = "http://10.0.0.10:11434"
CURATOR_MODEL = "ollama-remote/qwen3:30b-a3b-instruct-2507-q8_0"
# State file tracks last curation
STATE_FILE = Path("/tmp/curator_turn_state.json")
def get_curator_prompt(conversation_text: str) -> str:
"""Generate prompt for gem extraction."""
return f"""You are a memory curator. Extract only the most valuable gems (key insights) from this conversation.
Rules:
1. Extract only genuinely important information (decisions, preferences, key facts)
2. Skip transient/trivial content (greetings, questions, temporary requests)
3. Each gem should be self-contained and useful for future context
4. Format: concise, factual statements
5. Max 3-5 gems total
Conversation to curate:
---
{conversation_text}
---
Return ONLY a JSON array of gems like:
[{{"text": "User decided to use X approach for Y", "category": "decision"}}]
Categories: preference, fact, decision, entity, other
JSON:"""
def load_state() -> Dict[str, Any]:
"""Load curation state."""
if STATE_FILE.exists():
try:
with open(STATE_FILE) as f:
return json.load(f)
except:
pass
return {"last_turn": 0, "last_curation": None}
def save_state(state: Dict[str, Any]):
"""Save curation state."""
with open(STATE_FILE, 'w') as f:
json.dump(state, f, indent=2)
def get_point_count_since(last_time: str) -> int:
"""Get count of points since last curation time."""
try:
response = requests.post(
f"{QDRANT_URL}/collections/{MEMORIES_COLLECTION}/points/count",
json={
"filter": {
"must": [
{
"key": "timestamp",
"range": {
"gt": last_time
}
}
]
}
},
timeout=30
)
response.raise_for_status()
return response.json().get("result", {}).get("count", 0)
except Exception as e:
print(f"Error getting count: {e}", file=sys.stderr)
return 0
def get_turns_since(last_turn: int, limit: int = 100) -> List[Dict[str, Any]]:
"""Get all turns since last curation."""
try:
response = requests.post(
f"{QDRANT_URL}/collections/{MEMORIES_COLLECTION}/points/scroll",
json={"limit": limit, "with_payload": True},
timeout=30
)
response.raise_for_status()
data = response.json()
turns = []
for point in data.get("result", {}).get("points", []):
turn_num = point.get("payload", {}).get("turn", 0)
if turn_num > last_turn:
turns.append(point)
# Sort by turn number
turns.sort(key=lambda x: x.get("payload", {}).get("turn", 0))
return turns
except Exception as e:
print(f"Error fetching turns: {e}", file=sys.stderr)
return []
def extract_gems_with_llm(conversation_text: str) -> List[Dict[str, str]]:
"""Send conversation to LLM for gem extraction."""
prompt = get_curator_prompt(conversation_text)
try:
response = requests.post(
f"{OLLAMA_URL}/v1/chat/completions",
json={
"model": CURATOR_MODEL,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.3,
"max_tokens": 1000
},
timeout=120
)
response.raise_for_status()
data = response.json()
content = data.get("choices", [{}])[0].get("message", {}).get("content", "[]")
# Extract JSON from response
try:
# Try to find JSON array in response
start = content.find('[')
end = content.rfind(']')
if start != -1 and end != -1:
json_str = content[start:end+1]
gems = json.loads(json_str)
if isinstance(gems, list):
return gems
except:
pass
return []
except Exception as e:
print(f"Error calling LLM: {e}", file=sys.stderr)
return []
def store_gem(gem: Dict[str, str]) -> bool:
"""Store a single gem to gems_tr."""
try:
# Get embedding for gem
response = requests.post(
f"{OLLAMA_URL}/api/embeddings",
json={"model": "snowflake-arctic-embed2", "prompt": gem["text"]},
timeout=30
)
response.raise_for_status()
vector = response.json().get("embedding", [])
if not vector:
return False
# Store to gems_tr
response = requests.put(
f"{QDRANT_URL}/collections/{GEMS_COLLECTION}/points",
json={
"points": [{
"id": hash(gem["text"]) % (2**63),
"vector": vector,
"payload": {
"text": gem["text"],
"category": gem.get("category", "other"),
"createdAt": datetime.now(timezone.utc).isoformat(),
"source": "turn_based_curator"
}
}]
},
timeout=30
)
response.raise_for_status()
return True
except Exception as e:
print(f"Error storing gem: {e}", file=sys.stderr)
return False
def main():
parser = argparse.ArgumentParser(description="Turn-based curator")
parser.add_argument("--threshold", "-t", type=int, default=10,
help="Run curation every N turns (default: 10)")
parser.add_argument("--execute", "-e", action="store_true",
help="Execute curation")
parser.add_argument("--dry-run", "-n", action="store_true",
help="Preview what would be curated")
parser.add_argument("--status", "-s", action="store_true",
help="Show current turn status")
args = parser.parse_args()
# Load state
state = load_state()
current_turn = get_current_turn_count()
turns_since = current_turn - state["last_turn"]
if args.status:
print(f"Current turn: {current_turn}")
print(f"Last curation: {state['last_turn']}")
print(f"Turns since last curation: {turns_since}")
print(f"Threshold: {args.threshold}")
print(f"Ready to curate: {'YES' if turns_since >= args.threshold else 'NO'}")
return
print(f"Turn-based Curator")
print(f"Current turn: {current_turn}")
print(f"Last curation: {state['last_turn']}")
print(f"Turns since: {turns_since}")
print(f"Threshold: {args.threshold}")
print()
if turns_since < args.threshold:
print(f"Not enough turns. Need {args.threshold}, have {turns_since}")
return
# Get turns to process
print(f"Fetching {turns_since} turns...")
turns = get_turns_since(state["last_turn"], limit=turns_since + 10)
if not turns:
print("No new turns found")
return
# Build conversation text
conversation_parts = []
for turn in turns:
role = turn.get("payload", {}).get("role", "unknown")
content = turn.get("payload", {}).get("content", "")
conversation_parts.append(f"{role.upper()}: {content}")
conversation_text = "\n\n".join(conversation_parts)
print(f"Processing {len(turns)} turns ({len(conversation_text)} chars)")
print()
if args.dry_run:
print("=== CONVERSATION TEXT ===")
print(conversation_text[:500] + "..." if len(conversation_text) > 500 else conversation_text)
print()
print("[DRY RUN] Would extract gems and store to gems_tr")
return
if not args.execute:
print("Use --execute to run curation or --dry-run to preview")
return
# Extract gems
print("Extracting gems with LLM...")
gems = extract_gems_with_llm(conversation_text)
if not gems:
print("No gems extracted")
return
print(f"Extracted {len(gems)} gems:")
for i, gem in enumerate(gems, 1):
print(f" {i}. [{gem.get('category', 'other')}] {gem['text'][:80]}...")
print()
# Store gems
print("Storing gems...")
success = 0
for gem in gems:
if store_gem(gem):
success += 1
# Update state
state["last_turn"] = current_turn
state["last_curation"] = datetime.now(timezone.utc).isoformat()
save_state(state)
print(f"Done! Stored {success}/{len(gems)} gems")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,85 @@
#!/usr/bin/env python3
"""
Migration: Add 'curated: false' to existing memories_tr entries.
Run once to update all existing memories for the new timer curator.
Uses POST /collections/{name}/points/payload with {"points": [ids], "payload": {...}}
"""
import requests
import time
import sys
QDRANT_URL = "http://10.0.0.40:6333"
COLLECTION = "memories_tr"
def update_existing_memories():
"""Add curated=false to all memories that don't have the field."""
print("🔧 Migrating existing memories...")
offset = None
updated = 0
batch_size = 100
max_iterations = 200
iterations = 0
while iterations < max_iterations:
iterations += 1
scroll_data = {
"limit": batch_size,
"with_payload": True
}
if offset:
scroll_data["offset"] = offset
try:
response = requests.post(
f"{QDRANT_URL}/collections/{COLLECTION}/points/scroll",
json=scroll_data,
headers={"Content-Type": "application/json"},
timeout=30
)
response.raise_for_status()
result = response.json()
points = result.get("result", {}).get("points", [])
if not points:
break
# Collect IDs that need curated=false
ids_to_update = []
for point in points:
payload = point.get("payload", {})
if "curated" not in payload:
ids_to_update.append(point["id"])
if ids_to_update:
# POST /points/payload with {"points": [ids], "payload": {...}}
update_response = requests.post(
f"{QDRANT_URL}/collections/{COLLECTION}/points/payload",
json={
"points": ids_to_update,
"payload": {"curated": False}
},
timeout=30
)
update_response.raise_for_status()
updated += len(ids_to_update)
print(f" Updated batch: {len(ids_to_update)} memories (total: {updated})")
time.sleep(0.05)
offset = result.get("result", {}).get("next_page_offset")
if not offset:
break
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
import traceback
traceback.print_exc()
break
print(f"✅ Migration complete: {updated} memories updated with curated=false")
if __name__ == "__main__":
update_existing_memories()

View File

@@ -0,0 +1,14 @@
[Unit]
Description=TrueRecall Turn-Based Curator (every 10 turns)
After=network.target mem-qdrant-watcher.service
[Service]
Type=simple
User=root
WorkingDirectory=/root/.openclaw/workspace/.projects/true-recall-v2/tr-continuous
ExecStart=/usr/bin/python3 /root/.openclaw/workspace/.projects/true-recall-v2/tr-continuous/curator_turn_based.py --threshold 10 --execute
Restart=on-failure
RestartSec=60
[Install]
WantedBy=multi-user.target