From a22e6f095a9fcdf0c8a43d72ac949bfc79573fe2 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 24 Feb 2026 21:42:48 -0600 Subject: [PATCH] chore: Remove unnecessary files and folders Removed: - session.md (user-specific session notes) - migrate_memories.py (one-time migration script) - test_curator.py (test file) - __pycache__/ (Python cache) - tr-compact/ (v1 deprecated) - tr-daily/ (v1 deprecated) - tr-worker/ (empty) - shared/ (empty) - tr-continuous/migrate_add_curated.py - tr-continuous/curator_by_count.py - tr-continuous/curator_turn_based.py - tr-continuous/curator_cron.sh - tr-continuous/turn-curator.service - tr-continuous/README.md (redundant) Remaining core files: - README.md, checklist.md, curator-prompt.md - install.py, push-all.sh, .gitignore - tr-continuous/curator_timer.py - tr-continuous/curator_config.json --- __pycache__/migrate_memories.cpython-312.pyc | Bin 7400 -> 0 bytes migrate_memories.py | 187 ------- session.md | 494 ------------------ test_curator.py | 64 --- tr-compact/hook.py | 105 ---- tr-continuous/README.md | 101 ---- tr-continuous/curator_by_count.py | 194 ------- tr-continuous/curator_cron.sh | 12 - tr-continuous/curator_turn_based.py | 291 ----------- tr-continuous/migrate_add_curated.py | 85 --- tr-continuous/turn-curator.service | 14 - .../curate_from_qdrant.cpython-312.pyc | Bin 15132 -> 0 bytes tr-daily/curate_from_qdrant.py | 358 ------------- 13 files changed, 1905 deletions(-) delete mode 100644 __pycache__/migrate_memories.cpython-312.pyc delete mode 100644 migrate_memories.py delete mode 100644 session.md delete mode 100644 test_curator.py delete mode 100644 tr-compact/hook.py delete mode 100644 tr-continuous/README.md delete mode 100644 tr-continuous/curator_by_count.py delete mode 100644 tr-continuous/curator_cron.sh delete mode 100644 tr-continuous/curator_turn_based.py delete mode 100755 tr-continuous/migrate_add_curated.py delete mode 100644 tr-continuous/turn-curator.service delete mode 100644 tr-daily/__pycache__/curate_from_qdrant.cpython-312.pyc delete mode 100644 tr-daily/curate_from_qdrant.py diff --git a/__pycache__/migrate_memories.cpython-312.pyc b/__pycache__/migrate_memories.cpython-312.pyc deleted file mode 100644 index 73369e153db11e0e4ab32f4aa1d5297d8c3792b4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7400 zcmbtZYiwIbcAm@oAbI(I>19j0vSnLTBGK|IiTsKyTauGlP9!DS$nw%O@0CoM5AD5| zQN)l2e|QTU>(=r*faS)9xhT>ouz%>Mi!O@YI4J`3N2yS$xHm4UxB*)KDq{z5>!3i- zT=G(+LT?uB2%MQabLN~gGiT0sX7rC13yUEA-T&MSb$Jl_H~gU`rc`_Ohbj4h<;2TF^m}^#xWzb(PJ#njG1`-n3*>S7TzdW zCMcdgPmNi5llrX}42>WMTFuaEQ(G-UsTJt$6AW*K9tUp|4Df9LN;`ZzdB=G)<`Uez z^K1t~qCtlQ*X@$(1e(>&>b$PRKe5L`lVU&?xQGymi6KGaCd62TI~$6G{N*M&R_^x8 zBHP6c3jw|+Wyc%57>LSVpy>+>fvDt@WHB@?anV>v61a|tAP4w>9C)6S&xE39L(xf2 z4opgzengImAii1_SHbXAK>$XeSIOHW114t3%P(<*O3iVn@ zk`?N3C@3rBq3CSFb4He@dwss1?cMlq*Y@5$J9q9>ELF2sOxpZm&I%<9@5u@y3UXYG z28q%-;(~@B3BSi#$gZIzl89L%iBez^JQ+qQT~arN``0C&;)x`2yVR2+oR|9tVz4h3eOC~rfE#*KHxqp}#6WPu0MDen2Ww|89XJay{SloX3XZT$Tc~ zi{2AIA3u4rbNpmt`}m3RnfG_5xN-jdU8yd}ca(D6vLn^2=G=)xU1KMK3`Q{H(f4;g zpGxRh7Pem}3JNL3Pb;))i(XPRg4&8chz(TWMGJgMcpo^9d)n_4V=>v+9h(-S!Ej*4 zHxm=jO4EU$;Om|iV^cyfxR2c zi9?n~-AC4zN*G zMvQ%$YQ%2Huc1k7zC=pL>ry07rl=&9B+tTT&k^FTB*jw+ShEzx(>w!z`Ws}5PU=MX zC28KU4_L`qn3|@riL6Vt%ud$4D*6!d#w5psj)}^FTlMht? zH~B7&19pDTl3qinl9h?NIpWeTtl?(V1MYN2N1%5Q0e_dA?~(7I8Dewg{xz+e0DMku z{Bc`55*t&j+^Y5P?j-vmx~bE2#WKL=*>fzGT&&FyGpH!h>q&3}g3Q5@=cZ$THYNBA zaBC-ODD)q2dQ_+Q5F6%09f|g!hj1t6XPG^qxlJ;=rJzF;wqd;@i3G{DT+M`AO{Lg3jU%#3H=dKjETLR z*RC*eF&qw^R`jCqi?{%Qsvj;@*f$Oj9~v60kjctO#_Q0!1) zAzx9erJut!Cd3fIpz<=;q{fnEvTX8i3hiALnhDMxn*`sp`R%7e$s~9 zvC=<$-+=6Omy;KhE4D35wk?m44rXa~%ni&Rn%_CswQT0THak{L9?&gya9(cG^mKkv5o>&So5 z(NI@yBUp)EdSI+#WD~SPphw_94-irZzJioXNCe)QkTI2-AlxdoCv{q=cNm@g8;qVw z0#KDo>JmWNgi9TQ)NV4Ym>{^2&hS^*hvF+mN9Y4x<;$j zl8mO=6gKvCNkbxx?tqH$0dxP~=QFRAC*J0=Z%06Q_V;q$yM@n-d|Voj12 zYgk5kHR>RF3;xr10VXU^C`s193QHL)*7U7gClEfT8h+dcSkaI&%GklEiMJ+=Kf(%2 z%6QIr>EC&^tUwA|qDj5WXgC2knl-1cqJxM*T{SQf164v_a`R#{Zf}81Azy!UROIky zJI=O1_V5eHh7umtEpigT2EIO>4#>eX;GnGv4L7ez2zA3kRP4lqu(J}M$5|K7x^d>i z*>;@uK&CK~9FXIZ_yTTu5oa$!=4Dl9TXc`CD#0&>5&|ZNOB$~bA%#|(6;sg{t1ebN zfEgUb*~^eg*eo@d%5g=fYF0UD9{dE()V^)57YE#G!1pYP8_P#j`X*~lH=>@RM~TZ zgi_*(wFom+<u%jN57El;TVs9GLT z>*JXl)JFvP+S;?!YuY}e-%#$Sc;gN5FAPBLjH2tfkDJeiElt54&z=+>GsI`@!c989g3Gp-XD+^T?nG2VuT2{);IB( zw{Z44WL{d_jqCVkDPq7;$Y^XvF@~hrL`;kXWFQ#AeVAA=6xEU@6$Z44Kxr#of3vHH~Xx^CFk+W`14`5?%o`3t+iR&j8TJAb? z&HHk;{prE`mZtd^ZtcCkHU~S~zFd9JBNOzej~ATC=2nGKzl!CZ_1C;tyz_%O=a#%{6GT;Jrhq7#5i#ln z;a5%usHt#$3G!>G5}>thB`RAhE&@MAtC?5Tcv3D03mArPk`+?QhP+0j^`%H#S7=x^VtCZ6Y!{{;*)J>wG?j!^ZaM;TM##-=lMR&?Q1&?QXCc1tM|1W+*FNruC36z(bc-w9_{+0t5-1# z2(xO~n;L>uT9|unhE*2kQRURt;-Ri=g?6)GUj+P8aH00r4<+{P|9_-74q}es_v%@$ z_BfcxRb(D> zgV>KV3?qtT^w99YkAZm{zQ1m{K2I#G{yjMOGK0M7AEes7N3b z6$K#tOZZ6(;Ih)_K4rYnasD_IS$6L-wQueFJO(eC&6-=5TeoLhdKO(-cK0&1=e~Q> z^5$JR_wMvDOnelg(5_rvN7mzAsL!%J%hZloV0=ttianhLqW#YwgLl>{tZq-eqd9N_~D{ZPHn`WtnQl->)p! zcVz3l3$8mOi_N*NgW2a_{+&L{4lYwif4Vzs-jvys=~w_UJC>=PCH|{=%LlHxlR3xc zFZG+%74m*%==>Iz%HFfMb+O^DJCcy#`~$= zIY3Z~!SBZ#>Gyj*ir(+%V?n=Py~8Nh;!9L_=^;wJ>?r0^qxybS42T$cb{cF|#MeI& z1BHm$7cWD8Tss9B{A)lKi%}FdQ-y?pS;Xe3+Nx?sSe0lXBKZBEAT`cVBc!K>eUTU+ z4+{sx7?5LNls<+G;&p=fj$#Pvu^kcSuaW0#GimRs1k u+){X%@DR+r8*^eZ?U)nu2sr(WfPsiIUr2mOHvWM!{Ib75BC1K9$o~TLNwe($ diff --git a/migrate_memories.py b/migrate_memories.py deleted file mode 100644 index b846520..0000000 --- a/migrate_memories.py +++ /dev/null @@ -1,187 +0,0 @@ -#!/usr/bin/env python3 -""" -Migrate memories from kimi_memories to memories_tr -- Reads from kimi_memories (Qdrant) -- Cleans/strips noise (metadata, thinking tags) -- Stores to memories_tr (Qdrant) -- Keeps original kimi_memories intact -""" - -import json -import urllib.request -import urllib.error -from datetime import datetime -from typing import List, Dict, Any - -QDRANT_URL = "http://10.0.0.40:6333" -SOURCE_COLLECTION = "kimi_memories" -TARGET_COLLECTION = "memories_tr" - -def clean_content(text: str) -> str: - """Clean noise from content""" - if not text: - return "" - - cleaned = text - - # Remove metadata JSON blocks - import re - cleaned = re.sub(r'Conversation info \(untrusted metadata\):\s*```json\s*\{[\s\S]*?\}\s*```', '', cleaned) - - # Remove thinking tags - cleaned = re.sub(r'\[thinking:[^\]]*\]', '', cleaned) - - # Remove timestamp lines - cleaned = re.sub(r'\[\w{3} \d{4}-\d{2}-\d{2} \d{2}:\d{2} [A-Z]{3}\]', '', cleaned) - - # Clean up whitespace - cleaned = re.sub(r'\n{3,}', '\n\n', cleaned) - cleaned = cleaned.strip() - - return cleaned - -def get_all_points(collection: str) -> List[Dict]: - """Get all points from a collection""" - all_points = [] - offset = None - max_iterations = 1000 - iterations = 0 - - while iterations < max_iterations: - iterations += 1 - scroll_data = { - "limit": 100, - "with_payload": True, - "with_vector": True - } - - if offset: - scroll_data["offset"] = offset - - req = urllib.request.Request( - f"{QDRANT_URL}/collections/{collection}/points/scroll", - data=json.dumps(scroll_data).encode(), - headers={"Content-Type": "application/json"}, - method="POST" - ) - - try: - with urllib.request.urlopen(req, timeout=60) as response: - result = json.loads(response.read().decode()) - points = result.get("result", {}).get("points", []) - - if not points: - break - - all_points.extend(points) - - offset = result.get("result", {}).get("next_page_offset") - if not offset: - break - except urllib.error.HTTPError as e: - print(f"Error: {e}") - break - - return all_points - -def store_points(collection: str, points: List[Dict]) -> int: - """Store points to collection""" - if not points: - return 0 - - # Batch upload - batch_size = 100 - stored = 0 - - for i in range(0, len(points), batch_size): - batch = points[i:i+batch_size] - - points_data = { - "points": batch - } - - req = urllib.request.Request( - f"{QDRANT_URL}/collections/{collection}/points", - data=json.dumps(points_data).encode(), - headers={"Content-Type": "application/json"}, - method="PUT" - ) - - try: - with urllib.request.urlopen(req, timeout=60) as response: - if response.status == 200: - stored += len(batch) - except urllib.error.HTTPError as e: - print(f"Error storing batch: {e}") - - return stored - -def migrate_point(point: Dict) -> Dict: - """Clean a single point""" - payload = point.get("payload", {}) - - # Clean user and AI messages - user_msg = clean_content(payload.get("user_message", "")) - ai_msg = clean_content(payload.get("ai_response", "")) - - # Keep other fields - cleaned_payload = { - **payload, - "user_message": user_msg, - "ai_response": ai_msg, - "migrated_from": "kimi_memories", - "migrated_at": datetime.now().isoformat() - } - - return { - "id": point.get("id"), - "vector": point.get("vector"), - "payload": cleaned_payload - } - -def main(): - print("=" * 60) - print("Memory Migration: kimi_memories β†’ memories_tr") - print("=" * 60) - print() - - # Check source - print(f"πŸ“₯ Reading from {SOURCE_COLLECTION}...") - source_points = get_all_points(SOURCE_COLLECTION) - print(f" Found {len(source_points)} points") - - if not source_points: - print("❌ No points to migrate") - return - - # Clean points - print(f"\n🧹 Cleaning {len(source_points)} points...") - cleaned_points = [migrate_point(p) for p in source_points] - print(f" βœ“ Cleaned") - - # Store to target - print(f"\nπŸ’Ύ Storing to {TARGET_COLLECTION}...") - stored = store_points(TARGET_COLLECTION, cleaned_points) - print(f" βœ“ Stored {stored} points") - - # Verify - print(f"\nπŸ” Verifying...") - target_points = get_all_points(TARGET_COLLECTION) - print(f" Target now has {len(target_points)} points") - - # Summary - print() - print("=" * 60) - print("Migration Summary:") - print(f" Source ({SOURCE_COLLECTION}): {len(source_points)} points") - print(f" Target ({TARGET_COLLECTION}): {len(target_points)} points") - print(f" Cleaned & migrated: {stored} points") - print("=" * 60) - - if stored == len(source_points): - print("\nβœ… Migration complete!") - else: - print(f"\n⚠️ Warning: Only migrated {stored}/{len(source_points)} points") - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/session.md b/session.md deleted file mode 100644 index 3bbedef..0000000 --- a/session.md +++ /dev/null @@ -1,494 +0,0 @@ -# TrueRecall v2 - Session Notes - -**Last Updated:** 2026-02-24 19:02 CST -**Status:** βœ… Active & Verified -**Version:** v2.2 (Timer-based curation deployed) - ---- - -## Session End (18:09 CST) - -**Reason:** User starting new session - -**Current State:** -- Real-time watcher: βœ… Active (capturing live) -- Timer curator: βœ… Deployed (every 30 min via cron) -- Daily curator: ❌ Removed (replaced by timer) -- Total memories: 12,378 (all tagged with `curated: false`) -- Gems: 5 (from Feb 18 test) - -**Next session start:** Read this file, then check: -```bash -# Quick status -python3 ~/.openclaw/workspace/.projects/true-recall-v2/tr-continuous/curator_by_count.py --status -sudo systemctl status mem-qdrant-watcher -curl -s http://:6333/collections/memories_tr | jq '.result.points_count' -``` - ---- - -## Executive Summary - -TrueRecall v2 is a complete memory system with real-time capture, daily curation, and context injection. All components are operational. - ---- - -## Current State (Verified 18:09 CST) - -### Qdrant Collections - -| Collection | Points | Purpose | Status | -|------------|--------|---------|--------| -| `memories_tr` | **12,378** | Full text (live capture) | βœ… Active | -| `gems_tr` | **5** | Curated gems (injection) | βœ… Active | -| `true_recall` | existing | Legacy archive | πŸ“¦ Preserved | -| `kimi_memories` | 12,223 | Original backup | πŸ“¦ Preserved | - -**Note:** All memories tagged with `curated: false` for timer curator. - -### Services - -| Service | Status | Uptime | -|---------|--------|--------| -| `mem-qdrant-watcher` | βœ… Active | 30+ min | -| OpenClaw Gateway | βœ… Running | 2026.2.23 | -| memory-qdrant plugin | βœ… Loaded | recall: gems_tr, capture: memories_tr | - ---- - -## Architecture - -### v2.2: Timer-Based Curation (DEPLOYED) - -**Data Flow:** -``` -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ OpenClaw Chat │────▢│ Real-Time Watcher │────▢│ memories_tr β”‚ -β”‚ (Session JSONL)β”‚ β”‚ (Python daemon) β”‚ β”‚ (Qdrant) β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜ - β”‚ - β”‚ Every 30 min - β–Ό - β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” - β”‚ Timer Curator β”‚ - β”‚ (cron/qwen3) β”‚ - β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ - β–Ό - β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” - β”‚ gems_tr β”‚ - β”‚ (Qdrant) β”‚ - β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ - Per turn β”‚ - β–Ό - β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” - β”‚ memory-qdrant β”‚ - β”‚ plugin β”‚ - β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ -``` - -**Key Changes:** -- βœ… Replaced daily 2:45 AM batch with 30-minute timer -- βœ… All memories tagged `curated: false` on write -- βœ… Migration completed for 12,378 existing memories -- βœ… No Redis dependency (direct Qdrant only) - ---- - -## Components - -### Curation Mode: Timer-Based (DEPLOYED v2.2) - -| Setting | Value | Adjustable | -|---------|-------|------------| -| **Trigger** | Cron timer | βœ… | -| **Interval** | 30 minutes | βœ… Config file | -| **Batch size** | 100 memories max | βœ… Config file | -| **Minimum** | None (0 is OK) | β€” | - -**Config:** `/tr-continuous/curator_config.json` -```json -{ - "timer_minutes": 30, - "max_batch_size": 100, - "user_id": "rob", - "source_collection": "memories_tr", - "target_collection": "gems_tr" -} -``` - -**Cron:** -``` -*/30 * * * * cd .../tr-continuous && python3 curator_timer.py -``` - -**Old modes deprecated:** -- ❌ Turn-based (every N turns) -- ❌ Hybrid (timer + turn) -- ❌ Daily batch (2:45 AM) - -### 1. Real-Time Watcher (Primary Capture) - -**Location:** `~/.openclaw/workspace/skills/qdrant-memory/scripts/realtime_qdrant_watcher.py` - -**Function:** -- Watches `~/.openclaw/agents/main/sessions/*.jsonl` -- Parses every conversation turn in real-time -- Embeds with `snowflake-arctic-embed2` (Ollama @ ) -- Stores directly to `memories_tr` (no Redis) -- **Cleans content:** Removes markdown, tables, metadata, thinking tags - -**Service:** `mem-qdrant-watcher.service` -- **Status:** Active since 16:46:53 CST -- **Systemd:** Enabled, auto-restart - -**Log:** `journalctl -u mem-qdrant-watcher -f` - ---- - -### 2. Content Cleaner (Existing Data) - -**Location:** `~/.openclaw/workspace/skills/qdrant-memory/scripts/clean_memories_tr.py` - -**Function:** -- Batch-cleans existing `memories_tr` points -- Removes: `**bold**`, `|tables|`, `` `code` ``, `---` rules, `# headers` -- Flattens nested content dicts -- Rate-limited to prevent Qdrant overload - -**Usage:** -```bash -# Dry run (preview) -python3 clean_memories_tr.py --dry-run - -# Clean all -python3 clean_memories_tr.py --execute - -# Clean limited (test) -python3 clean_memories_tr.py --execute --limit 100 -``` - ---- - -### 3. Timer Curator (v2.2 - DEPLOYED) - -**Replaces:** Daily curator (2:45 AM batch) and turn-based curator - -**Location:** `~/.openclaw/workspace/.projects/true-recall-v2/tr-continuous/curator_timer.py` - -**Schedule:** Every 30 minutes (cron) - -**Flow:** -1. Query uncurated memories (`curated: false`) -2. Send batch to qwen3 (max 100) -3. Extract gems using curator prompt -4. Store gems to `gems_tr` -5. Mark processed memories as `curated: true` - -**Files:** -| File | Purpose | -|------|---------| -| `curator_timer.py` | Main curator script | -| `curator_config.json` | Adjustable settings | -| `migrate_add_curated.py` | One-time migration (completed) | - -**Usage:** -```bash -# Dry run (preview) -python3 curator_timer.py --dry-run - -# Manual run -python3 curator_timer.py --config curator_config.json -``` - -**Status:** βœ… Deployed, first run will process ~12,378 existing memories - -### 5. Silent Compacting (NEW - Concept) - -**Idea:** Automatically remove old context from prompt when token limit approached. - -**Behavior:** -- Trigger: Context window > 80% full -- Action: Remove oldest messages (silently) -- Preserve: Gems always kept, recent N turns kept -- Result: Seamless conversation without "compacting" notification - -**Config:** -```json -{ - "compacting": { - "enabled": true, - "triggerAtPercent": 80, - "keepRecentTurns": 20, - "preserveGems": true, - "silent": true - } -} -``` - -**Status:** ⏳ Concept only - requires OpenClaw core changes - -### 6. memory-qdrant Plugin - -**Location:** `~/.openclaw/extensions/memory-qdrant/` - -**Config:** -```json -{ - "collectionName": "gems_tr", - "captureCollection": "memories_tr", - "autoRecall": true, - "autoCapture": true -} -``` - -**Function:** -- **Recall:** Searches `gems_tr`, injects as context (hidden) -- **Capture:** Session-level capture to `memories_tr` (backup) - -**Status:** Loaded, dual collection support working - ---- - -## Files & Locations - -### Core Project Files - -``` -~/.openclaw/workspace/.projects/true-recall-v2/ -β”œβ”€β”€ README.md # Architecture docs -β”œβ”€β”€ session.md # This file -β”œβ”€β”€ curator-prompt.md # Gem extraction prompt -β”œβ”€β”€ tr-daily/ # Daily batch curation -β”‚ └── curate_from_qdrant.py # Daily curator (2:45 AM) -β”œβ”€β”€ tr-continuous/ # Real-time curation (NEW) -β”‚ β”œβ”€β”€ curator_by_count.py # Turn-based curator -β”‚ β”œβ”€β”€ curator_turn_based.py # Alternative approach -β”‚ β”œβ”€β”€ curator_cron.sh # Cron wrapper -β”‚ β”œβ”€β”€ turn-curator.service # Systemd service -β”‚ └── README.md # Documentation -└── shared/ - └── (shared resources) -``` - -### New Files (2026-02-24 19:00) - -| File | Purpose | -|------|---------| -| `tr-continuous/curator_timer.py` | Timer-based curator (deployed) | -| `tr-continuous/curator_config.json` | Curator settings | -| `tr-continuous/migrate_add_curated.py` | Migration script (completed) | - -### Legacy Files (Pre-v2.2) - -| File | Status | Note | -|------|--------|------| -| `tr-daily/curate_from_qdrant.py` | πŸ“¦ Archived | Replaced by timer | -| `tr-continuous/curator_by_count.py` | πŸ“¦ Archived | Replaced by timer | -| `tr-continuous/curator_turn_based.py` | πŸ“¦ Archived | Replaced by timer | - -### System Locations - -| File | Purpose | -|------|---------| -| `~/.openclaw/extensions/memory-qdrant/` | Plugin code | -| `~/.openclaw/openclaw.json` | Plugin configuration | -| `/etc/systemd/system/mem-qdrant-watcher.service` | Systemd service | - ---- - -## Changes Made Today (2026-02-24 19:00) - -### 1. Timer Curator Deployed (v2.2) - -- Created `curator_timer.py` β€” simplified timer-based curation -- Created `curator_config.json` β€” adjustable settings -- Removed daily 2:45 AM cron job -- Added `*/30 * * * *` cron timer -- **Status:** βœ… Deployed, logs to `/var/log/true-recall-timer.log` - -### 2. Migration Completed - -- Created `migrate_add_curated.py` -- Tagged 12,378 existing memories with `curated: false` -- Updated watcher to add `curated: false` to new memories -- **Status:** βœ… Complete - -### 3. Simplified Architecture - -- ❌ Removed turn-based curator complexity -- ❌ Removed daily batch processing -- βœ… Single timer trigger every 30 minutes -- βœ… No minimum threshold (processes 0-N memories) - ---- - -## Configuration - -### memory-qdrant Plugin - -**File:** `~/.openclaw/openclaw.json` - -```json -{ - "memory-qdrant": { - "config": { - "autoCapture": true, - "autoRecall": true, - "collectionName": "gems_tr", - "captureCollection": "memories_tr", - "embeddingModel": "snowflake-arctic-embed2", - "maxRecallResults": 2, - "minRecallScore": 0.7, - "ollamaUrl": "http://:11434", - "qdrantUrl": "http://:6333" - }, - "enabled": true - } -} -``` - -### Gateway (OpenClaw Update Fix) - -```json -{ - "gateway": { - "controlUi": { - "allowedOrigins": ["*"], - "allowInsecureAuth": false, - "dangerouslyDisableDeviceAuth": true - } - } -} -``` - ---- - -## Validation Commands - -### Check Collections - -```bash -# Points count -curl -s http://:6333/collections/memories_tr | jq '.result.points_count' -curl -s http://:6333/collections/gems_tr | jq '.result.points_count' - -# Recent points -curl -s -X POST http://:6333/collections/memories_tr/points/scroll \ - -H "Content-Type: application/json" \ - -d '{"limit": 5, "with_payload": true}' | jq '.result.points[].payload.content' -``` - -### Check Services - -```bash -# Watcher status -sudo systemctl status mem-qdrant-watcher - -# Watcher logs -sudo journalctl -u mem-qdrant-watcher -n 20 - -# OpenClaw status -openclaw status -``` - ---- - -## Troubleshooting - -### Issue: Watcher Not Capturing - -**Check:** -1. Service running? `systemctl status mem-qdrant-watcher` -2. Logs: `journalctl -u mem-qdrant-watcher -f` -3. Qdrant accessible? `curl http://:6333/` -4. Ollama accessible? `curl http://:11434/api/tags` - -### Issue: Cleaner Fails - -**Common causes:** -- Qdrant connection timeout (add `time.sleep(0.1)` between batches) -- Nested content dicts (handled in updated script) -- Type errors (non-string content β€” handled) - -### Issue: Plugin Not Loading - -**Check:** -1. `openclaw.json` syntax valid? `openclaw config validate` -2. Plugin compiled? `cd ~/.openclaw/extensions/memory-qdrant && npx tsc` -3. Gateway logs: `tail /tmp/openclaw/openclaw-$(date +%Y-%m-%d).log` - ---- - -## Cron Schedule (Updated v2.2) - -| Time | Job | Script | Status | -|------|-----|--------|--------| -| Every 30 min | Timer curator | `tr-continuous/curator_timer.py` | βœ… Active | -| Per turn | Capture | `mem-qdrant-watcher` | βœ… Daemon | -| Per turn | Injection | `memory-qdrant` plugin | βœ… Active | - -**Removed:** -- ❌ 2:45 AM daily curator -- ❌ Every-minute turn curator check - ---- - -## Next Steps - -### Immediate -- ⏳ Monitor first timer run (logs: `/var/log/true-recall-timer.log`) -- ⏳ Validate gem extraction quality from timer curator -- ⏳ Archive old curator scripts if timer works - -### Completed βœ… -- βœ… **Compactor config** β€” Minimal overhead: `mode: default`, `reserveTokensFloor: 0`, `memoryFlush: false` - -### Future -- ⏳ Curator tuning based on timer results -- ⏳ Silent compacting (requires OpenClaw core changes) - -### Planned Features (Backlog) -- ⏳ **Interactive install script** β€” Prompts for embedding model, timer interval, batch size, endpoints -- ⏳ **Single embedding model option** β€” Use one model for both collections -- ⏳ **Configurable thresholds** β€” Per-user customization via prompts - -**Compactor Settings (Applied):** -```json5 -{ - agents: { - defaults: { - compaction: { - mode: "default", - reserveTokensFloor: 0, - memoryFlush: { enabled: false } - } - } - } -} -``` - -**Note:** Only `mode`, `reserveTokensFloor`, and `memoryFlush` are valid under `agents.defaults.compaction`. Other settings are Pi runtime parameters. - -**Install script prompts:** -1. Embedding model (snowflake vs mxbai) -2. Timer interval (5 min / 30 min / hourly) -3. Batch size (50 / 100 / 500) -4. Qdrant/Ollama URLs -5. User ID - ---- - -## Session Recovery - -If starting fresh: -1. Read `README.md` for architecture overview -2. Check service status: `sudo systemctl status mem-qdrant-watcher` -3. Check timer curator: `tail /var/log/true-recall-timer.log` -4. Verify collections: `curl http://:6333/collections` - ---- - -*Last Verified: 2026-02-24 19:29 CST* -*Version: v2.2 (30b curator, install script planned)* diff --git a/test_curator.py b/test_curator.py deleted file mode 100644 index 1980684..0000000 --- a/test_curator.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python3 -"""Quick test of curator with simple input""" -import json -import requests - -# Load prompt from v1 -with open('/root/.openclaw/workspace/.projects/true-recall-v1/curator-prompt.md') as f: - prompt = f.read() - -# Test with a simple conversation -test_turns = [ - { - 'turn': 1, - 'user_id': 'rob', - 'user': 'I want to switch from Redis to Qdrant for memory storage', - 'ai': 'Got it - Qdrant is a good choice for vector storage.', - 'conversation_id': 'test123', - 'timestamp': '2026-02-23T10:00:00', - 'date': '2026-02-23' - }, - { - 'turn': 2, - 'user_id': 'rob', - 'user': 'Yes, and I want the curator to read from Qdrant directly', - 'ai': 'Makes sense - we can modify the curator to query Qdrant instead of Redis.', - 'conversation_id': 'test123', - 'timestamp': '2026-02-23T10:01:00', - 'date': '2026-02-23' - } -] - -conversation_json = json.dumps(test_turns, indent=2) - -prompt_text = f"""## Input Conversation - -```json -{conversation_json} -``` - -## Output -""" - -response = requests.post( - 'http://10.0.0.10:11434/api/generate', - json={ - 'model': 'qwen3:4b-instruct', - 'system': prompt, - 'prompt': prompt_text, - 'stream': False, - 'options': {'temperature': 0.1, 'num_predict': 2000} - }, - timeout=120 -) - -result = response.json() -output = result.get('response', '').strip() -print('=== RAW OUTPUT ===') -print(output[:2000]) -print() -print('=== PARSED ===') -# Try to extract JSON -if '```json' in output: - parsed = output.split('```json')[1].split('```')[0].strip() - print(parsed) diff --git a/tr-compact/hook.py b/tr-compact/hook.py deleted file mode 100644 index c7c4a27..0000000 --- a/tr-compact/hook.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python3 -""" -TrueRecall v2 - Compaction Hook -Fast Redis queue push for compaction events - -Called by OpenClaw session_before_compact hook -""" - -import json -import sys -import redis -from datetime import datetime -from typing import List, Dict, Any - -# Redis config -REDIS_HOST = "10.0.0.36" -REDIS_PORT = 6379 -REDIS_DB = 0 -QUEUE_KEY = "tr:compact_queue" -TAG_PREFIX = "tr:processed" - -def get_redis_client(): - return redis.Redis( - host=REDIS_HOST, - port=REDIS_PORT, - db=REDIS_DB, - decode_responses=True - ) - -def tag_turns(messages: List[Dict], user_id: str = "rob"): - """Tag turns so v1 daily curator skips them""" - r = get_redis_client() - pipe = r.pipeline() - - for msg in messages: - conv_id = msg.get("conversation_id", "unknown") - turn = msg.get("turn", 0) - tag_key = f"{TAG_PREFIX}:{conv_id}:{turn}" - pipe.setex(tag_key, 86400, "1") # 24h TTL - - pipe.execute() - -def queue_messages(messages: List[Dict], user_id: str = "rob"): - """Push messages to Redis queue for background processing""" - r = get_redis_client() - - queue_item = { - "user_id": user_id, - "timestamp": datetime.now().isoformat(), - "message_count": len(messages), - "messages": messages - } - - # LPUSH to queue (newest first) - r.lpush(QUEUE_KEY, json.dumps(queue_item)) - - return len(messages) - -def process_compaction_event(event_data: Dict): - """ - Process session_before_compact event from OpenClaw - - Expected event_data: - { - "session_id": "uuid", - "user_id": "rob", - "messages_being_compacted": [ - {"role": "user", "content": "...", "turn": 1, "conversation_id": "..."}, - ... - ], - "compaction_reason": "context_limit" - } - """ - user_id = event_data.get("user_id", "rob") - messages = event_data.get("messages_being_compacted", []) - - if not messages: - return {"status": "ok", "queued": 0, "reason": "no_messages"} - - # Tag turns for v1 coordination - tag_turns(messages, user_id) - - # Queue for background processing - count = queue_messages(messages, user_id) - - return { - "status": "ok", - "queued": count, - "user_id": user_id, - "queue_key": QUEUE_KEY - } - -def main(): - """CLI entry point - reads JSON from stdin""" - try: - event_data = json.load(sys.stdin) - result = process_compaction_event(event_data) - print(json.dumps(result)) - sys.exit(0) - except Exception as e: - print(json.dumps({"status": "error", "error": str(e)})) - sys.exit(1) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/tr-continuous/README.md b/tr-continuous/README.md deleted file mode 100644 index d8afa5a..0000000 --- a/tr-continuous/README.md +++ /dev/null @@ -1,101 +0,0 @@ -# Turn-Based Curator - -Extract gems every N turns instead of waiting for daily curation. - -## Files - -| File | Purpose | -|------|---------| -| `curator_turn_based.py` | Main script - checks turn count, extracts gems | -| `curator_cron.sh` | Cron wrapper to run every minute | -| `turn-curator.service` | Alternative systemd service (runs on-demand) | - -## Usage - -### Manual Run - -```bash -# Check current status -python3 curator_turn_based.py --status - -# Preview what would be curated -python3 curator_turn_based.py --threshold 10 --dry-run - -# Execute curation -python3 curator_turn_based.py --threshold 10 --execute -``` - -### Automatic (Cron) - -Add to crontab: -```bash -* * * * * /root/.openclaw/workspace/.projects/true-recall-v2/tr-continuous/curator_cron.sh -``` - -Or use systemd timer: -```bash -sudo cp turn-curator.service /etc/systemd/system/ -sudo systemctl enable turn-curator.timer # If you create a timer -``` - -### Automatic (Integrated) - -Alternative: Modify `realtime_qdrant_watcher.py` to trigger curation every 10 turns. - -## How It Works - -1. **Tracks turn count** - Stores last curation turn in `/tmp/curator_turn_state.json` -2. **Monitors delta** - Compares current turn count vs last curation -3. **Triggers at threshold** - When 10+ new turns exist, runs curation -4. **Extracts gems** - Sends conversation to qwen3, gets gems -5. **Stores results** - Saves gems to `gems_tr` collection - -## State File - -`/tmp/curator_turn_state.json`: -```json -{ - "last_turn": 150, - "last_curation": "2026-02-24T17:00:00Z" -} -``` - -## Comparison with Daily Curator - -| Feature | Daily Curator | Turn-Based Curator | -|---------|--------------|-------------------| -| Schedule | 2:45 AM daily | Every 10 turns (dynamic) | -| Time window | 24 hours | Variable (depends on chat frequency) | -| Trigger | Cron | Turn threshold | -| Use case | Nightly batch | Real-time-ish extraction | -| Overlap | Low | Possible with daily curator | - -## Recommendation - -Use **BOTH**: -- **Turn-based**: Every 10 turns for active conversations -- **Daily**: 2:45 AM as backup/catch-all - -They'll deduplicate automatically (same embeddings β†’ skipped). - -## Testing - -```bash -# Simulate 10 turns -for i in {1..10}; do - echo "Test message $i" > /dev/null -done - -# Check status -python3 curator_turn_based.py --status - -# Run manually -python3 curator_turn_based.py --threshold 10 --execute -``` - -## Status - -- βœ… Script created: `curator_turn_based.py` -- βœ… Cron wrapper: `curator_cron.sh` -- ⏳ Deployment: Optional (manual or cron) -- ⏳ Testing: Pending diff --git a/tr-continuous/curator_by_count.py b/tr-continuous/curator_by_count.py deleted file mode 100644 index eeb3cee..0000000 --- a/tr-continuous/curator_by_count.py +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/bin/env python3 -""" -Turn-Based Curator: Extract gems every N new memories (turns). - -Usage: - python3 curator_by_count.py --threshold 10 --dry-run - python3 curator_by_count.py --threshold 10 --execute - python3 curator_by_count.py --status -""" - -import argparse -import json -import requests -import sys -from datetime import datetime, timezone, timedelta -from pathlib import Path - -QDRANT_URL = "http://10.0.0.40:6333" -MEMORIES = "memories_tr" -GEMS = "gems_tr" -OLLAMA = "http://10.0.0.10:11434" -MODEL = "ollama-remote/qwen3:30b-a3b-instruct-2507-q8_0" -STATE_FILE = Path("/tmp/curator_count_state.json") - -def load_state(): - if STATE_FILE.exists(): - with open(STATE_FILE) as f: - return json.load(f) - return {"last_count": 0, "last_time": None} - -def save_state(state): - with open(STATE_FILE, 'w') as f: - json.dump(state, f) - -def get_total_count(): - try: - r = requests.get(f"{QDRANT_URL}/collections/{MEMORIES}", timeout=10) - return r.json()["result"]["points_count"] - except: - return 0 - -def get_recent_memories(hours=1): - """Get memories from last N hours.""" - since = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat() - try: - r = requests.post( - f"{QDRANT_URL}/collections/{MEMORIES}/points/scroll", - json={"limit": 1000, "with_payload": True}, - timeout=30 - ) - points = r.json()["result"]["points"] - # Filter by timestamp - recent = [p for p in points if p.get("payload", {}).get("timestamp", "") > since] - return recent - except: - return [] - -def extract_gems(memories): - """Send to LLM for gem extraction.""" - if not memories: - return [] - - # Build conversation - parts = [] - for m in memories: - role = m["payload"].get("role", "unknown") - content = m["payload"].get("content", "")[:500] # Limit per message - parts.append(f"{role.upper()}: {content}") - - conversation = "\n\n".join(parts[:20]) # Max 20 messages - - prompt = f"""Extract 3-5 key gems (insights, decisions, facts) from this conversation. - -Conversation: -{conversation} - -Return JSON: [{{"text": "gem", "category": "decision|fact|preference"}}]""" - - try: - r = requests.post( - f"{OLLAMA}/v1/chat/completions", - json={ - "model": MODEL, - "messages": [{"role": "user", "content": prompt}], - "temperature": 0.3 - }, - timeout=120 - ) - content = r.json()["choices"][0]["message"]["content"] - - # Parse JSON - start = content.find('[') - end = content.rfind(']') - if start >= 0 and end > start: - return json.loads(content[start:end+1]) - except: - pass - return [] - -def store_gem(gem): - """Store gem to gems_tr.""" - try: - # Get embedding - r = requests.post( - f"{OLLAMA}/api/embeddings", - json={"model": "snowflake-arctic-embed2", "prompt": gem["text"]}, - timeout=30 - ) - vector = r.json()["embedding"] - - # Store - r = requests.put( - f"{QDRANT_URL}/collections/{GEMS}/points", - json={ - "points": [{ - "id": abs(hash(gem["text"])) % (2**63), - "vector": vector, - "payload": { - "text": gem["text"], - "category": gem.get("category", "other"), - "createdAt": datetime.now(timezone.utc).isoformat(), - "source": "turn_curator" - } - }] - }, - timeout=30 - ) - return r.status_code == 200 - except: - return False - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--threshold", "-t", type=int, default=10) - parser.add_argument("--execute", "-e", action="store_true") - parser.add_argument("--dry-run", "-n", action="store_true") - parser.add_argument("--status", "-s", action="store_true") - args = parser.parse_args() - - state = load_state() - current = get_total_count() - new_points = current - state.get("last_count", 0) - - if args.status: - print(f"Total memories: {current}") - print(f"Last curated: {state.get('last_count', 0)}") - print(f"New since last: {new_points}") - print(f"Threshold: {args.threshold}") - print(f"Ready: {'YES' if new_points >= args.threshold else 'NO'}") - return - - print(f"Curator: {new_points} new / {args.threshold} threshold") - - if new_points < args.threshold: - print("Not enough new memories") - return - - # Get recent memories (last hour should cover the new points) - memories = get_recent_memories(hours=1) - print(f"Fetched {len(memories)} recent memories") - - if not memories: - print("No memories to process") - return - - if args.dry_run: - print(f"[DRY RUN] Would process {len(memories)} memories") - return - - if not args.execute: - print("Use --execute to run or --dry-run to preview") - return - - # Extract gems - print("Extracting gems...") - gems = extract_gems(memories) - print(f"Extracted {len(gems)} gems") - - # Store - success = 0 - for gem in gems: - if store_gem(gem): - success += 1 - print(f" Stored: {gem['text'][:60]}...") - - # Update state - state["last_count"] = current - state["last_time"] = datetime.now(timezone.utc).isoformat() - save_state(state) - - print(f"Done: {success}/{len(gems)} gems stored") - -if __name__ == "__main__": - main() diff --git a/tr-continuous/curator_cron.sh b/tr-continuous/curator_cron.sh deleted file mode 100644 index 4c56984..0000000 --- a/tr-continuous/curator_cron.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -# Turn-based curator cron - runs every minute to check if 10 turns reached - -SCRIPT_DIR="/root/.openclaw/workspace/.projects/true-recall-v2/tr-continuous" - -# Check if enough turns accumulated -/usr/bin/python3 "${SCRIPT_DIR}/curator_turn_based.py" --threshold 10 --status 2>/dev/null | grep -q "Ready to curate: YES" - -if [ $? -eq 0 ]; then - # Run curation - /usr/bin/python3 "${SCRIPT_DIR}/curator_turn_based.py" --threshold 10 --execute 2>&1 | logger -t turn-curator -fi diff --git a/tr-continuous/curator_turn_based.py b/tr-continuous/curator_turn_based.py deleted file mode 100644 index 658b578..0000000 --- a/tr-continuous/curator_turn_based.py +++ /dev/null @@ -1,291 +0,0 @@ -#!/usr/bin/env python3 -""" -Turn-Based Curator: Extract gems every N turns (instead of daily). - -Usage: - python3 curator_turn_based.py --threshold 10 --dry-run - python3 curator_turn_based.py --threshold 10 --execute - python3 curator_turn_based.py --status # Show turn counts - -This tracks turn count since last curation and runs when threshold is reached. -""" - -import argparse -import json -import os -import requests -import sys -from datetime import datetime, timezone, timedelta -from pathlib import Path -from typing import List, Dict, Any, Optional - -# Config -QDRANT_URL = "http://10.0.0.40:6333" -MEMORIES_COLLECTION = "memories_tr" -GEMS_COLLECTION = "gems_tr" -OLLAMA_URL = "http://10.0.0.10:11434" -CURATOR_MODEL = "ollama-remote/qwen3:30b-a3b-instruct-2507-q8_0" - -# State file tracks last curation -STATE_FILE = Path("/tmp/curator_turn_state.json") - -def get_curator_prompt(conversation_text: str) -> str: - """Generate prompt for gem extraction.""" - return f"""You are a memory curator. Extract only the most valuable gems (key insights) from this conversation. - -Rules: -1. Extract only genuinely important information (decisions, preferences, key facts) -2. Skip transient/trivial content (greetings, questions, temporary requests) -3. Each gem should be self-contained and useful for future context -4. Format: concise, factual statements -5. Max 3-5 gems total - -Conversation to curate: ---- -{conversation_text} ---- - -Return ONLY a JSON array of gems like: -[{{"text": "User decided to use X approach for Y", "category": "decision"}}] - -Categories: preference, fact, decision, entity, other - -JSON:""" - -def load_state() -> Dict[str, Any]: - """Load curation state.""" - if STATE_FILE.exists(): - try: - with open(STATE_FILE) as f: - return json.load(f) - except: - pass - return {"last_turn": 0, "last_curation": None} - -def save_state(state: Dict[str, Any]): - """Save curation state.""" - with open(STATE_FILE, 'w') as f: - json.dump(state, f, indent=2) - -def get_point_count_since(last_time: str) -> int: - """Get count of points since last curation time.""" - try: - response = requests.post( - f"{QDRANT_URL}/collections/{MEMORIES_COLLECTION}/points/count", - json={ - "filter": { - "must": [ - { - "key": "timestamp", - "range": { - "gt": last_time - } - } - ] - } - }, - timeout=30 - ) - response.raise_for_status() - return response.json().get("result", {}).get("count", 0) - except Exception as e: - print(f"Error getting count: {e}", file=sys.stderr) - return 0 - -def get_turns_since(last_turn: int, limit: int = 100) -> List[Dict[str, Any]]: - """Get all turns since last curation.""" - try: - response = requests.post( - f"{QDRANT_URL}/collections/{MEMORIES_COLLECTION}/points/scroll", - json={"limit": limit, "with_payload": True}, - timeout=30 - ) - response.raise_for_status() - data = response.json() - - turns = [] - for point in data.get("result", {}).get("points", []): - turn_num = point.get("payload", {}).get("turn", 0) - if turn_num > last_turn: - turns.append(point) - - # Sort by turn number - turns.sort(key=lambda x: x.get("payload", {}).get("turn", 0)) - return turns - except Exception as e: - print(f"Error fetching turns: {e}", file=sys.stderr) - return [] - -def extract_gems_with_llm(conversation_text: str) -> List[Dict[str, str]]: - """Send conversation to LLM for gem extraction.""" - prompt = get_curator_prompt(conversation_text) - - try: - response = requests.post( - f"{OLLAMA_URL}/v1/chat/completions", - json={ - "model": CURATOR_MODEL, - "messages": [{"role": "user", "content": prompt}], - "temperature": 0.3, - "max_tokens": 1000 - }, - timeout=120 - ) - response.raise_for_status() - data = response.json() - - content = data.get("choices", [{}])[0].get("message", {}).get("content", "[]") - - # Extract JSON from response - try: - # Try to find JSON array in response - start = content.find('[') - end = content.rfind(']') - if start != -1 and end != -1: - json_str = content[start:end+1] - gems = json.loads(json_str) - if isinstance(gems, list): - return gems - except: - pass - - return [] - except Exception as e: - print(f"Error calling LLM: {e}", file=sys.stderr) - return [] - -def store_gem(gem: Dict[str, str]) -> bool: - """Store a single gem to gems_tr.""" - try: - # Get embedding for gem - response = requests.post( - f"{OLLAMA_URL}/api/embeddings", - json={"model": "snowflake-arctic-embed2", "prompt": gem["text"]}, - timeout=30 - ) - response.raise_for_status() - vector = response.json().get("embedding", []) - - if not vector: - return False - - # Store to gems_tr - response = requests.put( - f"{QDRANT_URL}/collections/{GEMS_COLLECTION}/points", - json={ - "points": [{ - "id": hash(gem["text"]) % (2**63), - "vector": vector, - "payload": { - "text": gem["text"], - "category": gem.get("category", "other"), - "createdAt": datetime.now(timezone.utc).isoformat(), - "source": "turn_based_curator" - } - }] - }, - timeout=30 - ) - response.raise_for_status() - return True - except Exception as e: - print(f"Error storing gem: {e}", file=sys.stderr) - return False - -def main(): - parser = argparse.ArgumentParser(description="Turn-based curator") - parser.add_argument("--threshold", "-t", type=int, default=10, - help="Run curation every N turns (default: 10)") - parser.add_argument("--execute", "-e", action="store_true", - help="Execute curation") - parser.add_argument("--dry-run", "-n", action="store_true", - help="Preview what would be curated") - parser.add_argument("--status", "-s", action="store_true", - help="Show current turn status") - - args = parser.parse_args() - - # Load state - state = load_state() - current_turn = get_current_turn_count() - turns_since = current_turn - state["last_turn"] - - if args.status: - print(f"Current turn: {current_turn}") - print(f"Last curation: {state['last_turn']}") - print(f"Turns since last curation: {turns_since}") - print(f"Threshold: {args.threshold}") - print(f"Ready to curate: {'YES' if turns_since >= args.threshold else 'NO'}") - return - - print(f"Turn-based Curator") - print(f"Current turn: {current_turn}") - print(f"Last curation: {state['last_turn']}") - print(f"Turns since: {turns_since}") - print(f"Threshold: {args.threshold}") - print() - - if turns_since < args.threshold: - print(f"Not enough turns. Need {args.threshold}, have {turns_since}") - return - - # Get turns to process - print(f"Fetching {turns_since} turns...") - turns = get_turns_since(state["last_turn"], limit=turns_since + 10) - - if not turns: - print("No new turns found") - return - - # Build conversation text - conversation_parts = [] - for turn in turns: - role = turn.get("payload", {}).get("role", "unknown") - content = turn.get("payload", {}).get("content", "") - conversation_parts.append(f"{role.upper()}: {content}") - - conversation_text = "\n\n".join(conversation_parts) - - print(f"Processing {len(turns)} turns ({len(conversation_text)} chars)") - print() - - if args.dry_run: - print("=== CONVERSATION TEXT ===") - print(conversation_text[:500] + "..." if len(conversation_text) > 500 else conversation_text) - print() - print("[DRY RUN] Would extract gems and store to gems_tr") - return - - if not args.execute: - print("Use --execute to run curation or --dry-run to preview") - return - - # Extract gems - print("Extracting gems with LLM...") - gems = extract_gems_with_llm(conversation_text) - - if not gems: - print("No gems extracted") - return - - print(f"Extracted {len(gems)} gems:") - for i, gem in enumerate(gems, 1): - print(f" {i}. [{gem.get('category', 'other')}] {gem['text'][:80]}...") - print() - - # Store gems - print("Storing gems...") - success = 0 - for gem in gems: - if store_gem(gem): - success += 1 - - # Update state - state["last_turn"] = current_turn - state["last_curation"] = datetime.now(timezone.utc).isoformat() - save_state(state) - - print(f"Done! Stored {success}/{len(gems)} gems") - -if __name__ == "__main__": - main() diff --git a/tr-continuous/migrate_add_curated.py b/tr-continuous/migrate_add_curated.py deleted file mode 100755 index 00b968f..0000000 --- a/tr-continuous/migrate_add_curated.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env python3 -""" -Migration: Add 'curated: false' to existing memories_tr entries. - -Run once to update all existing memories for the new timer curator. -Uses POST /collections/{name}/points/payload with {"points": [ids], "payload": {...}} -""" - -import requests -import time -import sys - -QDRANT_URL = "http://10.0.0.40:6333" -COLLECTION = "memories_tr" - -def update_existing_memories(): - """Add curated=false to all memories that don't have the field.""" - print("πŸ”§ Migrating existing memories...") - - offset = None - updated = 0 - batch_size = 100 - max_iterations = 200 - iterations = 0 - - while iterations < max_iterations: - iterations += 1 - - scroll_data = { - "limit": batch_size, - "with_payload": True - } - if offset: - scroll_data["offset"] = offset - - try: - response = requests.post( - f"{QDRANT_URL}/collections/{COLLECTION}/points/scroll", - json=scroll_data, - headers={"Content-Type": "application/json"}, - timeout=30 - ) - response.raise_for_status() - result = response.json() - points = result.get("result", {}).get("points", []) - - if not points: - break - - # Collect IDs that need curated=false - ids_to_update = [] - for point in points: - payload = point.get("payload", {}) - if "curated" not in payload: - ids_to_update.append(point["id"]) - - if ids_to_update: - # POST /points/payload with {"points": [ids], "payload": {...}} - update_response = requests.post( - f"{QDRANT_URL}/collections/{COLLECTION}/points/payload", - json={ - "points": ids_to_update, - "payload": {"curated": False} - }, - timeout=30 - ) - update_response.raise_for_status() - updated += len(ids_to_update) - print(f" Updated batch: {len(ids_to_update)} memories (total: {updated})") - time.sleep(0.05) - - offset = result.get("result", {}).get("next_page_offset") - if not offset: - break - - except Exception as e: - print(f"Error: {e}", file=sys.stderr) - import traceback - traceback.print_exc() - break - - print(f"βœ… Migration complete: {updated} memories updated with curated=false") - -if __name__ == "__main__": - update_existing_memories() diff --git a/tr-continuous/turn-curator.service b/tr-continuous/turn-curator.service deleted file mode 100644 index 99530d1..0000000 --- a/tr-continuous/turn-curator.service +++ /dev/null @@ -1,14 +0,0 @@ -[Unit] -Description=TrueRecall Turn-Based Curator (every 10 turns) -After=network.target mem-qdrant-watcher.service - -[Service] -Type=simple -User=root -WorkingDirectory=/root/.openclaw/workspace/.projects/true-recall-v2/tr-continuous -ExecStart=/usr/bin/python3 /root/.openclaw/workspace/.projects/true-recall-v2/tr-continuous/curator_turn_based.py --threshold 10 --execute -Restart=on-failure -RestartSec=60 - -[Install] -WantedBy=multi-user.target diff --git a/tr-daily/__pycache__/curate_from_qdrant.cpython-312.pyc b/tr-daily/__pycache__/curate_from_qdrant.cpython-312.pyc deleted file mode 100644 index 0483a830e3702cbad2e6f970562c841c5012ae29..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15132 zcmb_@Yj7Lam1YBIyc#cp;G2}x6iJDc_%KP4lBkzyQL-KsC6m;6#Mnk4c9Q}H0(3VZ zi3s3j{Fso55>aus2#u{3Jeyg=Wo?D|F%_n29LKiO;y1TqEKh(#ZjXa*w zP}CB|QDYRR;j})@n1;NyV_Ndojp@jn9@E2H=QB(j#thTOF{1|QX`gA@JZ2_wy^oo; zj9I3wW7cWgm~GlVW+&-dpJU9y8OBOD-m2sA_a?Uzd!P)rA2`y)T zn;xs;9DF5L0zVsX;jMfnZ=29oLs#)PVc?uVQ_Yn=n^%;c=3GEu!-E-3GZ;kjrkGTMfDOT+Q3m*mk~wtG(PzQG!WJ@!LPnPPc}d z)y`^L>O3#Ov@@eZh;JL_$2~qDd!>s#8WKFgfY8Sd^BzuQCxpN>`w}O3{6Y4zciQWo z=BERKmlqjEA?Vt}UJ8T+kqu0+;{pE_UJyM&Z@|BKNg(t2_;Dm^W%z4B!80Bdv1|}N zLLMJG$xn-HNc8$A+1Ib~{_a**gdXuC8w{vj2|}mcg3{@$-ryy6`r1X0m*uA~@*D>R z#dd}{FM1~VK8A&7W;S>Ux-^bsU#?K87%TU$uvg*Gq83W1A%AUPD~ph~vu zxvt)}=epXux|v5f%R?<%*@QVk?=&x)G30n((9@!k>2sdor50MI2fboYrU$&^L0Q-D zpOsB#XK+qDzHs@aU~s0dqoeb=cKqA(TwiZ@ceiZYumG~9U=qR=8;f*4*Voy(r+ZJ> zNhYR^OiY{4BTVvPCz;f~Jr~=&ei7;z4~F|Ygg_wJ(H@xL{o_8*)sCwH;j%d68Rt9N zXN15Mtd`h;&9y0W-a%$EAhbc|^h~gQnv)Fz9}Efp@hp##mk~l8{2u=s1WS~e3If9^ zPf)AIIL*h|cn=k&BGi;#&B?!2&K=isFTECEM|0<>QqlFG{8w_|jHh76wI4~fv z;;b0tr`hcE$r@oOJn#puLEDwCXO{u_3Kvm!9X&tXKYDi9eQx;dnRBD=bN! z!MwP$yyR9UL#Tk}8sI0s4#6Cip^f^wb(=F)+7uU)w%r*@W3EfQo_3Wj_1^4#ci)#f z`2G?TKgZyA4=oU(^>q*GCneKXC@7USCmNErc2aP;zDU9UjRl`%j8N){cmP^`vt%Fj z{gPJQUzqn%clABSAM5r}e+9pl%KrVjPaGxvyY-)JV=%s3-``{YAxd+` zsGc)L4G}{`e;Mjq)Cj#117{9HeNh8vvVWQ-!@y z(~4}h{HA%2;%q^aS{}aax9Olo{hqR_vHUw?{5ZS8ryTiIHKuN2&cT(y7@c=ZKck6I!Ykug)80sque8p=Nu(M{DpRWW=W^%_Qmr9M zN9dw$mpt1amD2?6qUL%p@S!cD8=zkOCGeX)VlO(%+Wag;9T7)SnmTTaI_7A}CNx_M zXq=nSY%8EC-GqkaOln)3P-r8$b%~TjHfyUs-_|DeY%ieLtT*~d?z-8emWF&ST;u!Z zs4L>)nvmT@1H^WanCAMY5!+N#5k7ILtE{1}r)UbOc0NnR?R=ZM+nlG2*q*&Wum7Li z1)8g1sxBAJMW(jqqPYlMv>j1v#5%?1l5!EQiin~$K|I~b!&PrMUutga@;#fR@PdW2 z&-#XSgnBa@yQ;}ut1aP024%Y!`5=q3>jvQyMUjod>jd1o9#)*;$GsEYaS$(g`1W%w z3Tf@6A`%P}DF#GMQh^u{f;`7woZV16obIG3(zyL0kU0gV2AQ4?!GVt_iLAfk@r8J} zS#_8BSy?~r365WqX>4ui5eEbUp2v^yxEyWK$$B5i)_e54MfY&hSr4JTo&s?>LdgJEp{d z9|X^{BiI&p!Xq0mfh+*Rx@?%{K{)0f;c9-Qk&P&;20}p;=){l@-byQSi66vs=-VXk zR{Hbk2hjTv9<;yx^Z5T-xxpUI$@OfQVf}$1JAopGY*YtiC3oXAFQQ~FQ?kY51tDIX z3HU``X2wA}f!TI@f*{)Fr&e|pt}NcHj0iP=q>OPZ(Hs0h_a%>bsl_bQSW;O58e5aK z97dUueUbAV4)26) zH(|>S3d})Y08}^eaJx$QXHZPs4#6B1^W5QY@ls>YvXI=`x6=Q2$9{V3-j4hA$%BK^ zmNRqIW1ym1Gy-n!mNMB85_~@IMcF9uuZMV0PK?9Z2=mgwaR1P#`~2{r>>N3Je)#Ay z_tCS1gU61Jo;*7w(}chDTxfa*_z6^q0LOzMB>0FZsz+uP1-!v!1IJ^g&`&;qp?KgA zJ&!`DY(6nMdXC7WGEHjG&j`TZvH>_5$!Oe-EoEeJg(|GTqHGX1VQ2;p0k^`dLL=td zr#;u)Uf4in4~Q~T5Vt6MMk%R-@yaHZ5rqyYEIfxbXn0vqW?a^jiiK`WH7Q&yPD0N> z9~LKty;uU5QY?c252dFcvY2vt(Iphdp%6AI{vDhNAn}$^W=FbgOQLdlr_|iHYCVuH ztBFr0u1QV%R;~LpdYzNW==3hb+-V>!Et@-)wmIjHr^~D6hSJW`xl<3Avc)%&OjFua zy69PO%rg(ltI~CBx}yFwBVEbNTRyW=*3xw2uKCk3)2g-pp}p*ef5E?IZ(OxEt~;vd zeX+Jw^L}Y(-%7h=J-O~IU#huT6FZ%BHm^B5R-GM5XJ@*+L2BA7mF-KrsxxNF(e#+o zJD7Q0#$IYJU3XS4ZN0g5&AD^cxig_#9+sRtlg@n^%4(}wIk8^7|1T@OEw-%Rv%h4q^q|nnTddv+5p4IA}z9@Ixs$S#FTRmt znleKg%I26mmUh;DPF3l*&Fj;pl}jx*TVf~ULb7z%yb;Bgg zOjtu*!=2N&PABXu+GO2<&s^w0xK$=u+Vyhfdo-+fU-l~mk_EJW5EO{J10KRPrJ|HpC- z#w+v?7u-YApYbzzKdgcY;T?L4+*G zGnLbP)D+OB2$YH6pbhD6YlAr*2*3n7qJ>obZP3?1A2V@g(C3)<^>;1m=^xdBgm2H+ zrp7qyWjH{Ds$fpX;Xu}&Dx$%>N8Q${64IL*pvjF*t&5^^BL`W(n7+f=IfvS#uhMxy z-ziGnaS@uY(6OR=|30piw76%J7W1|Je`*o5&fE-6 z)#T?;jYafo|D%TBmRzbKn3H_fnH0AM>(zX&EHB|k3~CL9YX_Q~LC{4ElN!GUG%L{R zQ3}sPVpDj3J@tTO-5NC(v{D{1ZmhE)uOg!7D!Hl}f!|rmhKh zofmlpbw#u_i_R*DEP;XxWidraZhwT5EC?Yd%y>H{c|YE~e0WRe zis$-R;Z;Z%P$ml-vpo}7{L;Y}k#bi1N+Jx*lHF9=nfXHs7t`-wHl!ePk;w)qe zH!i9&n<&$O@q=^X7*(Hfh^Fcl-+j1>yvAi;4FvJ5yEN5?u~SB~>?~3(<8;`F!i*v}9_0Sl*DX zs7qJ%Ne2d{zBAGpk5q9nT~YgB+xGOfhR^i6D*I;^vz_@uZ!Iy*oygQtrjj*d?W(ah zCjQd6ecfIoZD~(j`%(0xXd)oh9!}biNTwqXY~?pzU3m37ZV<;JX>}_j-=E%bV zGE)rAEIeJ|w$*$krfI>wv+ zuB_Oi6}(U_*=2$sV*v=lrN>k=7dw$XzGQQ*6l_gI&IJJj5dzt9?92o^8XrXkZ_KA=%~!^w18yV@vyuxT~Px& z3OAF1Y^IVTo2g^j^b^~UZHa#;)gDROpO;L}Kd`uN99lT^og=XWtCpP-y;B%~7Emdk z-Tb8C=Cm=%e+2>>R4JQ;nQ>tBC`6QuA z0}#KVh5|t|CrVDCN#Lmmz5 z5&^7&n1DgN^&>>oRds1t(pFG)%2%H)7WF3w$$*i2J1H`-nuj|ENU`(ZK-SS6x<0` z$Bn}ahhw{z_49|5mfn@7Rm<}d{XAi_4IGxUT3&+nEab2Vm4|SnS`Z#l@F7nz)uI@u zY=Ap8c^&}%)1vxt8w@}k4aO1uUBky&(KM?2J?#|ZA6+5;fOV4wfZvRND#wGJlzGN- zYR&<_`H|VxG3MV>CHc4-1M##bst3Wnh}yzg2=J>1IM*E2zoDPC0-Q@?1)`@d+;j92 zKYp3@P5}Fg0EdFL8rc}YT{K(Hf-nlaE&{8w1%~f-0kQC~NyS2i9q=h2@-6FLJT?l) z+Ad7k4Z$PaazYyfL!&|$ypFdpg!hDFm~k9~7cn>if$SXZABGAFU@qYdX1;{MFa{$K zv=|DQ-H_$yS1^~zlSJHsy_*$x6xMf)<2c=1NWo`$;T902;3wiC0sL>PjOk)8Cv8nA zK3Mej_092_=6k*OnkDP$0yw5BVTX=2F*05@8h#y@pm26-Q-GkQ9 zsufe(Rfz`FDm0*0eTD%TP@OjpE*xC5)UR6VQ53MJE!G>o3%!dY-#)bNs!E$|OQS>ywIxqbhie>Rz0L_ z*mig%ADg$1MVoAd`b$le z4(3kC4TpjL55NtKdDH;I`i_m};=q9110>l6`!%Yx_W^y6e!c8OS8Us59 zcS_zrhWNzfZtZr3Nm09bqG4qt!OX7#zAO5@fUPPP3uPl2vm1#3pkOS}0>*AoWEg^e z=EV?@Gy0)IA`vLt2*nOr2g(iELBbN|qNvegEZ8LWT(8_j9l3*4-5A6$Ao2&Ez;ILG zqih)$aR4IsMKF+c-4Xr*sQwjx;@?03uqkK)b$5)njB!0cbyd5h%H4_1#K@0c{^;f9 z3jiRt9hF)K?)Bdr|1XoDOy2iO7rvG}{)%+$RjK^7xuJDiP3&;ewhMNgi87Zi?#<|Q z4#NY+2576pF!$mYR?1$L+m(uj0Weus!$>vuNDaNq;BavGfwc@ZjT$yxyB*NhDtm^a zD;*h~0p%exk%uZMYndWw$GcZ8J0yCCatFaEa!+nEtixOs-a*BLVgg}hWsl7~Xz*Ss zyj1{#DC%j0(AC|7TVfL`hO=nmYlLM|`!@JaU&Ivt1^!}-*Pzlgb1@xulzRn#J z8}eL1bYdaTrsd0mXu|kP8g+~t>MjrFfGRjXDhk1!0*I>USm030-s^CXyDT= ztYC{u_tXmR%BL5spt;BjF6LH{Dd@}Yh#p#QEo`|Uw+&oaimi1!*P-$TwDcSSE;sUa zo?kmRsiQ0JWV7k6>)xd7o_zb8mhIW3Z0}}euYZ37kK|US2#@S7wt`%3mPhs#&L*5w zDC0i$2!*?Ie|`-&?9RS?I(Hyyfu9wAwul+_<6E#7nP7`rZGyOwr&3hrg3bRzp(V_6J(E=B2f=q)753bm4&-!44x^CfkFkX8JsvS1xoR>$# zr2yR%1OUW90Kvz>NdlmAf*pGSEtly_yl*CKP&}5y2S-#_81zX&(J@Zzzu{rU(bTR&kLMq7JLT&6RZ|C&k3__LdXwJ87PRlQ8f!!4+Q)>gNoO7 zD?0@~F;F>w#mirX4jI7R8<2NWfxtp7FeY$GnH9sPrVDLN(``*$2$M5+e(+s(lbJz` zBZPNs=FYqCu;-M)qQ5wfrBBWJFwP#!A!Wo$09knJX6}6dd+aDWLkA0-(IFEz7F79{ z>~S94b+E!hXgli^M4&tYv5F9Cl)rud9hSJ#vSEWl5o*L_#V2MckS&#;;CA-dHE$3q zZx11(G0@=K>2iMK)6)W_nQgDt_K zNXsPbKM4s8%YrF^xG9D8EV%6Rp-!bSoE63C9>)i2gUAwXVHDhxRpx6ZF5)dvLkRVM z9AdT%7(1XoEEU?0Ik{1iin1_1&lQZ3RhoqvKd0~j{!f7%A*re_Y-ZZp+Q5}-nhn$7 zQ{NWWLmaF!e+Grax}lDK0cDi1^(d_GB>DdbeJpqk<-Wq!lhZTc9)i{j%<<;FpiIC& z;RVZ-k^>8x{enhwQ^b%dQpT0-@Qt_Ff#D16@cE%W7QCeiH>)$&9>VUx%6y_8RoE}!P6T=w zTnlUxjaH=4hN@wCnsZf)EkJ^@^KWloXO$`;0XG2j{Cf+^YNHWs9Gv)lfG5_sRLCaS z7HG2KWqZFc8JY%*-Z}gfWUGe*=d5g+%n%~{78O@2;<(4=7;i)_vv zOT;E6YrKkEl?U6nh`}5N$VzAx#Hj<#7Z(B|1_ePEkYN<zKAZzAwH@V%k!4 zyYl_RBhvP6iP@8)dq_efR4Fy?m6&}gdOxJR{UT_%dPcH#rMd^Cu4DH`B<5s_KDBPG zk2ikU`a$cmDQVq5cMM49PyIlQ^}HKi^u>2bOiPO11r|EA9ohG!Og(9nU8#v-Q*`~h zt1fmv-WBhUT}Wt>u3hM--?m_!*Uaz1k?y;>FLoU4XD)CVN|Z{jR&=c?U#hxU6>E%Z zlTJXd;^0ZtJa0f-lrcu%G2Jr7E0R^sQe{h`YPos2W~E!QA4!>>M*>IO5W5x^CHwAQ znOea)Mc)Qsn8`BV^HzAV|IIg$gn7R^7J9EQp}BQfs@aujlI+i=Or6;tA5PIn)-8?k z^GVC@xd9yN5ReDo+aKERADt0FZ5+hPYr&Qjx3}&|8lxZ*0 zSYso1UcU8m{FUXd| z*aL^_bIPKxn%6zF)Wo{u^oQ0DtSL(yu|Re$cmHJXkN2+h+-pj9pSbV3zg_A+BXtek zAD2ppQ`V8R)wTHg!r=UiX=~X-%hs3}ANX+agTa)gGi^mzw$ls8AEG7m_t%r9&QwBI zz^O^k%n!wSW6cuNkfIw2&+kg?O|<0p!2DpWB4(19`V_q#_Yu?dt1K)do7`?Z1KjSG zM%n0gbAfTUTd~bA!>K@QZL(eQoNUk9=@mekT;k*`1P9n4T2TZ<4`l;b)Zq#c3|JgN z69Lg40gXn&cQH7D0XZQFgeKxAIXZ(FL-1YJ!Re}a3E&}0jO4O@0vs)a3LXc#8AU)j znZ6hZ`0__6Im&QMe$O=Tc7H?>?1`Lp;A;v(Qpm?27XAr}BcLRXLYUEMG@8$7vxa_L zL20c2OgYHE@_(V4{~J~HFH~!iYW-)*4yTsAX^u&kHAs{*T~-g_1AwrZbZOI^BkgRQ zLyF2VjYPRJ2D7FvHu9K)SElNMM&pckXDECxHz@Cw#+Bf`qxW6+PW<$Z)xM#}Br|hO z+pZ~%k0u%u;_}hNwU63YoAzfYNL%6V?YXbJKXTvn$>G(*!;dj7b5Uc{G{e=|?5Nf3jE%+{;$pn{)@vCGK9|SIYlVA^U!Na*S)~&j-qSGtp8u~MFS)N diff --git a/tr-daily/curate_from_qdrant.py b/tr-daily/curate_from_qdrant.py deleted file mode 100644 index 726eed4..0000000 --- a/tr-daily/curate_from_qdrant.py +++ /dev/null @@ -1,358 +0,0 @@ -#!/usr/bin/env python3 -""" -True-Recall v2 Curator: Reads from Qdrant kimi_memories - -Reads 24 hours of conversation from Qdrant kimi_memories collection, -extracts contextual gems using qwen3, stores to Qdrant gems_tr with mxbai embeddings. - -Usage: - python curate_from_qdrant.py --user-id rob - python curate_from_qdrant.py --user-id rob --date 2026-02-23 -""" - -import json -import argparse -import requests -import urllib.request -from datetime import datetime, timedelta -from pathlib import Path -from typing import List, Dict, Any, Optional -import hashlib - -# Configuration -QDRANT_URL = "http://10.0.0.40:6333" -SOURCE_COLLECTION = "memories_tr" -TARGET_COLLECTION = "gems_tr" - -OLLAMA_URL = "http://10.0.0.10:11434" -EMBEDDING_MODEL = "mxbai-embed-large" -CURATION_MODEL = "qwen3:4b-instruct" - -# Load curator prompt -CURATOR_PROMPT_PATH = "/root/.openclaw/workspace/.projects/true-recall/curator-prompt.md" - - -def load_curator_prompt() -> str: - """Load the curator system prompt.""" - try: - with open(CURATOR_PROMPT_PATH, 'r') as f: - return f.read() - except FileNotFoundError: - # Fallback to v2 location - CURATOR_PROMPT_PATH_V2 = "/root/.openclaw/workspace/.projects/true-recall-v2/curator-prompt.md" - with open(CURATOR_PROMPT_PATH_V2, 'r') as f: - return f.read() - - -def get_turns_from_qdrant(user_id: str, date_str: str) -> List[Dict[str, Any]]: - """ - Get all conversation turns from Qdrant for a specific user and date. - - Returns turns sorted by conversation_id and turn_number. - """ - # Build filter for user_id and date - filter_data = { - "must": [ - {"key": "user_id", "match": {"value": user_id}}, - {"key": "date", "match": {"value": date_str}} - ] - } - - # Use scroll API to get all matching points - all_points = [] - offset = None - max_iterations = 100 # Safety limit - iterations = 0 - - while iterations < max_iterations: - iterations += 1 - scroll_data = { - "limit": 100, - "with_payload": True, - "filter": filter_data - } - - if offset: - scroll_data["offset"] = offset - - req = urllib.request.Request( - f"{QDRANT_URL}/collections/{SOURCE_COLLECTION}/points/scroll", - data=json.dumps(scroll_data).encode(), - headers={"Content-Type": "application/json"}, - method="POST" - ) - - try: - with urllib.request.urlopen(req, timeout=30) as response: - result = json.loads(response.read().decode()) - points = result.get("result", {}).get("points", []) - - if not points: - break - - all_points.extend(points) - - # Check if there's more - offset = result.get("result", {}).get("next_page_offset") - if not offset: - break - except urllib.error.HTTPError as e: - if e.code == 404: - print(f"⚠️ Collection {SOURCE_COLLECTION} not found") - return [] - raise - - # Convert points to turn format (harvested summaries) - turns = [] - for point in all_points: - payload = point.get("payload", {}) - - # Extract user and AI messages - user_msg = payload.get("user_message", "") - ai_msg = payload.get("ai_response", "") - - # Get timestamp from created_at - created_at = payload.get("created_at", "") - - turn = { - "turn": payload.get("turn_number", 0), - "user_id": payload.get("user_id", user_id), - "user": user_msg, - "ai": ai_msg, - "conversation_id": payload.get("conversation_id", ""), - "session_id": payload.get("session_id", ""), - "timestamp": created_at, - "date": payload.get("date", date_str), - "content_hash": payload.get("content_hash", "") - } - - # Skip if no content - if turn["user"] or turn["ai"]: - turns.append(turn) - - # Sort by conversation_id, then by turn number - turns.sort(key=lambda x: (x.get("conversation_id", ""), x.get("turn", 0))) - - return turns - - -def extract_gems_with_curator(turns: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Use qwen3 to extract gems from conversation turns.""" - if not turns: - return [] - - prompt = load_curator_prompt() - - # Build the conversation input - conversation_json = json.dumps(turns, indent=2) - - # Call Ollama with native system prompt - response = requests.post( - f"{OLLAMA_URL}/api/generate", - json={ - "model": CURATION_MODEL, - "system": prompt, - "prompt": f"## Input Conversation\n\n```json\n{conversation_json}\n```\n\n## Output\n", - "stream": False, - "options": { - "temperature": 0.1, - "num_predict": 4000 - } - } - ) - - if response.status_code != 200: - raise RuntimeError(f"Curation failed: {response.text}") - - result = response.json() - output = result.get('response', '').strip() - - # Extract JSON from output (handle markdown code blocks) - if '```json' in output: - output = output.split('```json')[1].split('```')[0].strip() - elif '```' in output: - output = output.split('```')[1].split('```')[0].strip() - - try: - # Extract JSON array - find first [ and last ] - start_idx = output.find('[') - end_idx = output.rfind(']') - if start_idx != -1 and end_idx != -1 and end_idx > start_idx: - output = output[start_idx:end_idx+1] - - gems = json.loads(output) - if not isinstance(gems, list): - print(f"Warning: Curator returned non-list, wrapping: {type(gems)}") - gems = [gems] if gems else [] - return gems - except json.JSONDecodeError as e: - print(f"Error parsing curator output: {e}") - print(f"Raw output: {output[:500]}...") - return [] - - -def get_embedding(text: str) -> List[float]: - """Get embedding vector from Ollama using mxbai-embed-large.""" - response = requests.post( - f"{OLLAMA_URL}/api/embeddings", - json={ - "model": EMBEDDING_MODEL, - "prompt": text - } - ) - - if response.status_code != 200: - raise RuntimeError(f"Embedding failed: {response.text}") - - return response.json()['embedding'] - - -def get_gem_id(gem: Dict[str, Any], user_id: str) -> int: - """Generate deterministic integer ID for a gem.""" - hash_bytes = hashlib.sha256( - f"{user_id}:{gem.get('conversation_id', '')}:{gem.get('turn_range', '')}".encode() - ).digest()[:8] - return int.from_bytes(hash_bytes, byteorder='big') % (2**63) - - -def check_duplicate(gem: Dict[str, Any], user_id: str) -> bool: - """Check if a similar gem already exists in gems_tr.""" - gem_id = get_gem_id(gem, user_id) - - # Check if point exists - try: - req = urllib.request.Request( - f"{QDRANT_URL}/collections/{TARGET_COLLECTION}/points/{gem_id}", - headers={"Content-Type": "application/json"}, - method="GET" - ) - with urllib.request.urlopen(req, timeout=10) as response: - return True # Point exists - except urllib.error.HTTPError as e: - if e.code == 404: - return False # Point doesn't exist - raise - - -def store_gem_to_qdrant(gem: Dict[str, Any], user_id: str) -> bool: - """Store a gem to Qdrant with embedding.""" - # Create embedding from gem text - embedding_text = f"{gem.get('gem', '')} {gem.get('context', '')} {gem.get('snippet', '')}" - vector = get_embedding(embedding_text) - - # Prepare payload - payload = { - "user_id": user_id, - **gem - } - - # Generate deterministic integer ID - gem_id = get_gem_id(gem, user_id) - - # Store to Qdrant - response = requests.put( - f"{QDRANT_URL}/collections/{TARGET_COLLECTION}/points", - json={ - "points": [{ - "id": gem_id, - "vector": vector, - "payload": payload - }] - } - ) - - return response.status_code == 200 - - -def main(): - parser = argparse.ArgumentParser(description="True-Recall Curator v2 - Reads from Qdrant") - parser.add_argument("--user-id", required=True, help="User ID to process") - parser.add_argument("--date", help="Specific date to process (YYYY-MM-DD), defaults to yesterday") - parser.add_argument("--dry-run", action="store_true", help="Don't store, just preview") - args = parser.parse_args() - - # Determine date (yesterday by default) - if args.date: - date_str = args.date - else: - yesterday = datetime.now() - timedelta(days=1) - date_str = yesterday.strftime("%Y-%m-%d") - - print(f"πŸ” True-Recall Curator v2 for {args.user_id}") - print(f"πŸ“… Processing date: {date_str}") - print(f"🧠 Embedding model: {EMBEDDING_MODEL}") - print(f"πŸ’Ž Target collection: {TARGET_COLLECTION}") - print() - - # Get turns from Qdrant - print(f"πŸ“₯ Fetching conversation turns from {SOURCE_COLLECTION}...") - turns = get_turns_from_qdrant(args.user_id, date_str) - print(f"βœ… Found {len(turns)} turns") - - if not turns: - print("⚠️ No turns to process. Exiting.") - return - - # Show sample - print("\nπŸ“„ Sample turns:") - for i, turn in enumerate(turns[:3], 1): - user_msg = turn.get("user", "")[:60] - ai_msg = turn.get("ai", "")[:60] - print(f" Turn {turn.get('turn')}: User: {user_msg}...") - print(f" AI: {ai_msg}...") - if len(turns) > 3: - print(f" ... and {len(turns) - 3} more") - - # Extract gems - print("\n🧠 Extracting gems with The Curator (qwen3)...") - gems = extract_gems_with_curator(turns) - print(f"βœ… Extracted {len(gems)} gems") - - if not gems: - print("⚠️ No gems extracted. Exiting.") - return - - # Preview gems - print("\nπŸ’Ž Preview of extracted gems:") - for i, gem in enumerate(gems[:3], 1): - print(f"\n--- Gem {i} ---") - print(f"Gem: {gem.get('gem', 'N/A')[:100]}...") - print(f"Categories: {gem.get('categories', [])}") - print(f"Importance: {gem.get('importance', 'N/A')}") - print(f"Confidence: {gem.get('confidence', 'N/A')}") - - if len(gems) > 3: - print(f"\n... and {len(gems) - 3} more gems") - - if args.dry_run: - print("\nπŸƒ DRY RUN: Not storing gems.") - return - - # Check for duplicates and store - print("\nπŸ’Ύ Storing gems to Qdrant...") - stored = 0 - skipped = 0 - failed = 0 - - for gem in gems: - # Check for duplicates - if check_duplicate(gem, args.user_id): - print(f" ⏭️ Skipping duplicate: {gem.get('gem', 'N/A')[:50]}...") - skipped += 1 - continue - - if store_gem_to_qdrant(gem, args.user_id): - stored += 1 - else: - print(f" ⚠️ Failed to store gem: {gem.get('gem', 'N/A')[:50]}...") - failed += 1 - - print(f"\nβœ… Stored: {stored}") - print(f"⏭️ Skipped (duplicates): {skipped}") - print(f"❌ Failed: {failed}") - - print("\nπŸŽ‰ Curation complete!") - - -if __name__ == "__main__": - main() \ No newline at end of file