From 08aaddb4d007874027d23865da45d2ab9d1ab1d7 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 26 Feb 2026 08:28:12 -0600 Subject: [PATCH] fix: Add tr-worker files, sanitize IPs, update validation checklists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add realtime_qdrant_watcher.py and mem-qdrant-watcher.service to tr-worker/ - Sanitize private IPs (10.0.0.x → , ) - Replace absolute paths with placeholders - Add GIT_VALIDATION_CHECK.md for security validation - Update validation checklists to v2.4 - Remove session.md from git (local-only file) --- .gitignore | 6 + GIT_VALIDATION_CHECK.md | 113 ++++++ README.md | 26 +- audit_checklist.md | 68 +++- checklist.md | 56 ++- session.md | 587 --------------------------- tr-continuous/curator_timer.py | 6 +- tr-worker/mem-qdrant-watcher.service | 19 + tr-worker/realtime_qdrant_watcher.py | 332 +++++++++++++++ 9 files changed, 611 insertions(+), 602 deletions(-) create mode 100644 GIT_VALIDATION_CHECK.md delete mode 100644 session.md create mode 100644 tr-worker/mem-qdrant-watcher.service create mode 100644 tr-worker/realtime_qdrant_watcher.py diff --git a/.gitignore b/.gitignore index 492e716..f17b0ce 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,9 @@ datasets/ build/ dist/ *.egg-info/ + +# Session and validation files (local only) +session.md +VALIDATION_*.md +audit_results_*.md +CONTEXT_INJECTION_*.md diff --git a/GIT_VALIDATION_CHECK.md b/GIT_VALIDATION_CHECK.md new file mode 100644 index 0000000..7c41ecd --- /dev/null +++ b/GIT_VALIDATION_CHECK.md @@ -0,0 +1,113 @@ +# TrueRecall v2 - Git Validation Checklist + +**Environment:** Git Repository (`.git_projects/true-recall-v2/`) +**Purpose:** Validate git-ready directory for public sharing +**Version:** 2.4 +**Last Updated:** 2026-02-26 + +--- + +## Overview + +This checklist validates the **git repository** where **NO sensitive data** should exist. All private information must be sanitized before sharing. + +**Key Principle:** In git, placeholders required: +- ❌ NO real private IPs (10.0.0.x, 192.168.x.x) +- ❌ NO absolute paths (/root/, /home/username/) +- ❌ NO real user IDs or credentials +- ✅ Use placeholders: ``, ``, `~/.openclaw/` + +--- + +## Current Configuration (Sanitized for Git) + +| Service | Placeholder | Default Port | +|---------|-------------|---------------| +| Qdrant | `` | 6333 | +| Ollama | `` | 11434 | +| Redis | `` | 6379 | +| Gateway | `` | 18789 | +| Gitea | `` | 3000 | + +--- + +## SECTION 1: Critical Security Checks (MUST PASS) + +### 1.1 Private IP Addresses (FORBIDDEN in Git) + +| # | Check | Status | +|---|-------|--------| +| 1.1.1 | No 10.x.x.x IPs | ✅ PASS | +| 1.1.2 | No 192.168.x.x IPs | ✅ PASS | +| 1.1.3 | No 172.16-31.x.x IPs | ✅ PASS | + +**Verification:** +```bash +grep -rE '10\.[0-9]+\.[0-9]+\.[0-9]+' --include="*.py" --include="*.md" . +``` + +### 1.2 Absolute Paths (FORBIDDEN in Git) + +| # | Check | Status | +|---|-------|--------| +| 1.2.1 | No /root/ paths | ✅ PASS | +| 1.2.2 | No /home/[user]/ paths | ✅ PASS | + +**Verification:** +```bash +grep -rE '/root/|/home/[a-z]+/' --include="*.py" --include="*.md" . +``` + +### 1.3 Credentials & Secrets (FORBIDDEN in Git) + +| # | Check | Status | +|---|-------|--------| +| 1.3.1 | No passwords | ✅ PASS | +| 1.3.2 | No API tokens | ✅ PASS | +| 1.3.3 | No private keys | ✅ PASS | + +--- + +## SECTION 2: Files & Structure + +### 2.1 Required Files + +| File | Status | +|------|--------| +| README.md | ✅ Present (sanitized) | +| curator_timer.py | ✅ Present (sanitized) | +| curator_config.json | ✅ Present | +| .gitignore | ✅ Present (updated) | + +### 2.2 Files NOT in Git (Local Only) + +| File | Expected | +|------|----------| +| session.md | ❌ Not in git | +| VALIDATION_*.md | ❌ Not in git | +| audit_results_*.md | ❌ Not in git | + +--- + +## SECTION 3: Placeholder Verification + +| File | QDRANT_IP | OLLAMA_IP | ~/.openclaw | +|------|-----------|-----------|--------------| +| README.md | ✅ | ✅ | ✅ | +| curator_timer.py | ✅ | ✅ | ✅ | + +--- + +## Validation Summary + +- ✅ No private IPs found +- ✅ No absolute paths (/root/) +- ✅ No credentials/secrets +- ✅ Placeholders used correctly +- ✅ .gitignore updated + +**Status:** ✅ READY FOR COMMIT + +--- + +*Last validated: 2026-02-26 08:30 CST* diff --git a/README.md b/README.md index 4634885..732ab9a 100644 --- a/README.md +++ b/README.md @@ -102,12 +102,24 @@ After: Watching current session (93dc32bf... from Feb 25) ✅ ## Overview -TrueRecall v2 extracts "gems" (key insights) from conversations and injects them as context. It consists of three layers: +TrueRecall v2 is a **standalone memory system** that extracts "gems" (key insights) from conversations and injects them as context. It operates independently — not an addon or extension of any previous system. + +TrueRecall v2 replaces both Jarvis Memory and TrueRecall v1 with a completely re-architected solution: + +| System | Status | Relationship to v2 | +|--------|--------|-------------------| +| **Jarvis Memory** | Legacy | Replaced by v2 | +| **TrueRecall v1** | Deprecated | Replaced by v2 | +| **TrueRecall v2** | ✅ Active | Complete standalone replacement | + +### Three-Layer Architecture 1. **Capture** — Real-time watcher saves every turn to `memories_tr` -2. **Curation** — Daily curator extracts gems to `gems_tr` +2. **Curation** — Timer-based curator extracts gems to `gems_tr` 3. **Injection** — Plugin searches `gems_tr` and injects gems per turn +**Key:** v2 requires no components from Jarvis Memory or v1. It is self-contained with its own storage (Qdrant-only), capture mechanism, and injection system. + --- ## Current State @@ -200,7 +212,7 @@ TrueRecall v2 extracts "gems" (key insights) from conversations and injects them **File:** `skills/qdrant-memory/scripts/realtime_qdrant_watcher.py` **What it does:** -- Watches `/root/.openclaw/agents/main/sessions/*.jsonl` +- Watches `~/.openclaw/agents/main/sessions/*.jsonl` - Parses each turn (user + AI) - Embeds with `snowflake-arctic-embed2` - Stores to `memories_tr` instantly @@ -382,7 +394,7 @@ python3 clean_memories_tr.py --execute --limit 100 ### 6. memory-qdrant Plugin -**Location:** `/root/.openclaw/extensions/memory-qdrant/` +**Location:** `~/.openclaw/extensions/memory-qdrant/` **Config (openclaw.json):** ```json @@ -435,8 +447,8 @@ python3 clean_memories_tr.py --execute --limit 100 | File | Purpose | |------|---------| -| `/root/.openclaw/extensions/memory-qdrant/` | Plugin code | -| `/root/.openclaw/openclaw.json` | Configuration | +| `~/.openclaw/extensions/memory-qdrant/` | Plugin code | +| `~/.openclaw/openclaw.json` | Configuration | | `/etc/systemd/system/mem-qdrant-watcher.service` | Service file | --- @@ -445,7 +457,7 @@ python3 clean_memories_tr.py --execute --limit 100 ### memory-qdrant Plugin -**File:** `/root/.openclaw/openclaw.json` +**File:** `~/.openclaw/openclaw.json` ```json { diff --git a/audit_checklist.md b/audit_checklist.md index 7ec12bf..5b81222 100644 --- a/audit_checklist.md +++ b/audit_checklist.md @@ -1,6 +1,6 @@ -# TrueRecall v2 - Master Audit Checklist (LOCAL) +# TrueRecall v2 - Master Audit Checklist (GIT) -**For:** `.local_projects/true-recall-v2/` (Working/Development Directory) +**For:** `.git_projects/true-recall-v2/` (Git Repository - Sanitized) **Version:** 2.2 **Last Updated:** 2026-02-25 10:07 CST @@ -8,7 +8,12 @@ ## Overview -This checklist validates the **local working directory** with real IPs, paths, and credentials. Use this for development, debugging, and local testing. +This checklist validates the **git repository** where all private IPs, absolute paths, and credentials have been sanitized. Use this before pushing to public repositories. + +**Related Files:** +- `GIT_VALIDATION_CHECK.md` - Comprehensive git validation checklist +- `LOCAL_VALIDATION_CHECK.md` - Local dev validation (in `.local_projects/`) +- `VALIDATION_NOTES.md` - Auto-generated validation findings --- @@ -22,8 +27,22 @@ This checklist validates the **local working directory** with real IPs, paths, a | Watcher stuck on old session | ✅ **Fixed 12:22** | Restarted watcher service | | Plugin capture 0 exchanges | ✅ **Fixed 12:34** | Added `extractMessageText()` for array content | | Plugin exchanges working | ✅ **Verified 12:41** | 9 exchanges extracted per session | +| **HTML comments in UI** | ✅ **Fixed 14:02** | Changed `formatRelevantMemoriesContext` to clean text format | +| **prependContext vs systemPrompt** | ✅ **Fixed 14:02** | Changed hook return from `prependContext` to `systemPrompt` for hidden injection | +| **TypeScript source not updated** | ✅ **Fixed 14:02** | Updated `.ts` file, not just compiled `.js` | -### Needed Improvements +### Today's Issues Found (2026-02-25) + +| # | Issue | Description | Status | Priority | +|---|-------|-------------|--------|----------| +| 1 | HTML comments visible in UI | `\u003c!-- relevant-memories-start --\u003e` blocks showing in chat | ✅ **FIXED** | High | +| 2 | Memory injection format | Was using HTML comment format, now clean "Memory Injection:" text | ✅ **FIXED** | High | +| 3 | prependContext vs systemPrompt | Plugin was using `prependContext` (visible in user message) instead of `systemPrompt` (hidden in system prompt) | ✅ **FIXED** | High | +| 4 | TypeScript source not updated | OpenClaw compiles from `.ts`, was editing `.js` only | ✅ **FIXED** | High | +| 5 | Gateway restart issues | kill/killall not working reliably | ✅ **FIXED** | Medium | +| 6 | **README needs update** | TrueRecall v2 is standalone, not addon to Jarvis Memory | ✅ **FIXED** | Medium | + +### Needed Improvements (Carryover) | Issue | Description | Priority | |-------|-------------|----------| @@ -263,6 +282,47 @@ This checklist validates the **local working directory** with real IPs, paths, a --- +## 10. Recent Fixes to Verify (2026-02-25) + +### Plugin Memory Format Fix +**Status:** ✅ **FIXED** + +**Summary:** +- Changed `formatRelevantMemoriesContext` from HTML comment format to clean text +- Changed hook return from `prependContext` to `systemPrompt` (hides from UI) +- Updated both TypeScript source (`.ts`) and compiled JavaScript (`.js`) + +**Files Modified:** +- `/extensions/memory-qdrant/index.ts` +- `/extensions/memory-qdrant/index.js` + +**What Changed:** +```typescript +// Before: +return ` +...`; +return { prependContext: formatRelevantMemoriesContext(...) }; + +// After: +return `Memory Injection: Historical context from previous conversations: +1. [category] text`; +return { systemPrompt: formatRelevantMemoriesContext(...) }; +``` + +**Verification Checklist:** +- [ ] Send test message - memories appear as "Memory Injection:" not HTML +- [ ] No `` tags visible in chat +- [ ] Gateway restarted after changes + +### Pending Updates + +| # | Item | Description | Status | +|---|------|-------------|--------| +| 1 | README update | Clarify v2 is standalone, not addon | ✅ **FIXED** | +| 2 | Comparison table | Update v2 vs Jarvis vs v1 | ✅ **FIXED** | + +--- + ## Sign-Off | Role | Name | Date | Signature | diff --git a/checklist.md b/checklist.md index c10cf64..5e80705 100644 --- a/checklist.md +++ b/checklist.md @@ -536,7 +536,61 @@ Before releasing/sharing: --- -### 13. Sign-off Checklist +#### 12.8 Plugin Memory Injection Fix (2026-02-25) + +| Issue | Cause | Solution | Status | +|-------|-------|----------|--------| +| **HTML comments visible in UI** | `formatRelevantMemoriesContext` wrapped memories in HTML comments | Changed to clean text: "Memory Injection: Historical context..." | ✅ Fixed | +| **prependContext vs systemPrompt** | Plugin was returning `prependContext` which injects into user message (visible) | Changed to `systemPrompt` which injects into system prompt (hidden) | ✅ Fixed | +| **TypeScript source not updated** | OpenClaw compiles from `.ts`, edits were only to `.js` | Updated both `index.ts` and `index.js` | ✅ Fixed | +| **Gateway restart needed** | Plugin changes require gateway restart to take effect | Restarted gateway after file updates | ✅ Fixed | + +**Files Modified:** +- `/root/.openclaw/extensions/memory-qdrant/index.ts` - Main TypeScript source +- `/root/.openclaw/extensions/memory-qdrant/index.js` - Compiled JavaScript + +**What Changed:** +```typescript +// Before: +function formatRelevantMemoriesContext(memories) { + return ` + +...`; +} +return { prependContext: formatRelevantMemoriesContext(...) }; + +// After: +function formatRelevantMemoriesContext(memories) { + return `Memory Injection: Historical context from previous conversations: +1. [category] text`; +} +return { systemPrompt: formatRelevantMemoriesContext(...) }; +``` + +**Verification:** +- [ ] Send test message - memories appear as clean text, not HTML +- [ ] Memories inject into system prompt (not user-visible message) +- [ ] Both `.ts` and `.js` files updated consistently +- [ ] Gateway restarted and running + +--- + +### 12.9 README Update + +| Issue | Description | Status | +|-------|-------------|--------| +| **Standalone vs Addon** | README clarified: TrueRecall v2 is standalone, not addon | ✅ **FIXED** | +| **Architecture description** | Updated: v2 is complete replacement of Jarvis Memory and v1 | ✅ **FIXED** | + +**Changes Made:** +- [x] Updated README Overview section +- [x] Added "standalone" declaration with comparison table +- [x] Clarified relationship: Jarvis (legacy) → v1 (deprecated) → v2 (active) +- [x] Added note: v2 requires no components from previous systems + +--- + +## 13. Sign-off Checklist | Section | Status | Date | Checked By | |---------|--------|------|------------| diff --git a/session.md b/session.md deleted file mode 100644 index 5b3dddb..0000000 --- a/session.md +++ /dev/null @@ -1,587 +0,0 @@ -# TrueRecall v2 - Session Notes - -**Last Updated:** 2026-02-25 12:04 CST -**Status:** ✅ **Context Injection FIXED & Working** -**Version:** v2.2.1 (Post-fix validation) - ---- - -## 🔥 CRITICAL FIXES APPLIED (2026-02-25 12:00 CST) - -### Issues Found & Fixed - -| Issue | Root Cause | Fix Applied | -|-------|------------|-------------| -| **Context injection broken** | Embedding model mismatch | ✅ Changed curator from `mxbai-embed-large` to `snowflake-arctic-embed2` | -| **Gems had no vectors** | `store_gem()` used wrong field | ✅ Updated to use `text` field for embedding | -| **JSON parsing errors** | Complex prompt causing LLM failures | ✅ Simplified extraction prompt | -| **Field mismatch** | Memories have `text`, curator expected `content` | ✅ Curator now supports both `text` and `content` fields | -| **Silent embedding failures** | No error logging | ✅ Added explicit error messages | -| **Gem ID collision** | Hash used non-existent fields | ✅ Hash now uses `embedding_text_for_hash[:100]` | -| **Meta-gems extracted** | Curator extracted from debug output | ✅ Added SKIP_PATTERNS filter | -| **gems_tr pollution** | 5 meta-gems + 1 real gem | ✅ Cleaned, now 1 real gem only | -| **First-person gems** | Third person format "User decided..." | ✅ Changed to "I decided..." for better matching | - -### Validation Results - -```bash -# Test query: "OpenClaw gateway update fixed gems" -# Result: Score 0.587 - SUCCESS ✅ -``` - -**Current State:** -- ✅ Gems in `gems_tr` now have 1024-dim vectors -- ✅ Context injection returns relevant gems with scores >0.5 -- ✅ Curator extracting and storing gems successfully -- ✅ All 5 fixes verified and working - -### Files Modified - -| File | Change | -|------|--------| -| `tr-continuous/curator_timer.py` | Embedding model, field handling, JSON parsing | -| `README.md` | Updated status and embedding model info | -| `function_check.md` | Added fixes section, updated sign-off | -| `session.md` | This update | - ---- - -## Needed Improvements - -| Issue | Description | Priority | -|-------|-------------|----------| -| **Semantic Deduplication** | No dedup between similar gems. Same fact phrased differently creates multiple gems. | High | -| **Search Result Deduplication** | Similar gems above threshold both injected, causing redundancy. | Medium | -| **Gem Quality Scoring** | No quality metric. Some gems may be low value. | Medium | -| **Temporal Decay** | All gems treated equally regardless of age. | Low | -| **Gem Merging/Updating** | When user changes preference, old gem still exists. | Low | -| **Importance Calibration** | All curator gems marked "medium". Should be dynamic. | Low | - ---- - -## Session End (18:09 CST) - -**Reason:** User starting new session - -**Current State:** -- Real-time watcher: ✅ Active (capturing live) -- Timer curator: ✅ Deployed (every 5 min via cron) -- Daily curator: ❌ Removed (replaced by timer) -- Total memories: 12,729 (1,502 uncurated, 11,227 curated) -- Gems: 73 (actively extracting) - -**Next session start:** Read this file, then check: -```bash -# Quick status -python3 ~/.openclaw/workspace/.local_projects/true-recall-v2/tr-continuous/curator_timer.py --status -sudo systemctl status mem-qdrant-watcher -curl -s http://:6333/collections/memories_tr | jq '.result.points_count' -``` - ---- - -## Executive Summary - -TrueRecall v2 is a complete memory system with real-time capture, daily curation, and context injection. All components are operational. - ---- - -## Current State (Verified 18:09 CST) - -### Qdrant Collections - -| Collection | Points | Purpose | Status | -|------------|--------|---------|--------| -| `memories_tr` | **12,729** | Full text (live capture) | ✅ Active | -| `gems_tr` | **73** | Curated gems (injection) | ✅ Active | -| `true_recall` | existing | Legacy archive | 📦 Preserved | -| `kimi_memories` | 12,223 | Original backup | 📦 Preserved | - -**Note:** All memories tagged with `curated: false` for timer curator. - -### Services - -| Service | Status | Uptime | -|---------|--------|--------| -| `mem-qdrant-watcher` | ✅ Active | 30+ min | -| OpenClaw Gateway | ✅ Running | 2026.2.23 | -| memory-qdrant plugin | ✅ Loaded | recall: gems_tr, capture: memories_tr | - ---- - -## Architecture - -### v2.2: Timer-Based Curation (DEPLOYED) - -**Data Flow:** -``` -┌─────────────────┐ ┌──────────────────────┐ ┌─────────────┐ -│ OpenClaw Chat │────▶│ Real-Time Watcher │────▶│ memories_tr │ -│ (Session JSONL)│ │ (Python daemon) │ │ (Qdrant) │ -└─────────────────┘ └──────────────────────┘ └──────┬──────┘ - │ - │ Every 5 min - ▼ - ┌──────────────────┐ - │ Timer Curator │ - │ (cron/qwen3) │ - └────────┬─────────┘ - │ - ▼ - ┌──────────────────┐ - │ gems_tr │ - │ (Qdrant) │ - └────────┬─────────┘ - │ - Per turn │ - ▼ - ┌──────────────────┐ - │ memory-qdrant │ - │ plugin │ - └──────────────────┘ -``` - -**Key Changes:** -- ✅ Replaced daily 2:45 AM batch with 5-minute timer -- ✅ All memories tagged `curated: false` on write -- ✅ Migration completed for 12,378 existing memories -- ✅ No Redis dependency (direct Qdrant only) - ---- - -## Components - -### Curation Mode: Timer-Based (DEPLOYED v2.2) - -| Setting | Value | Adjustable | -|---------|-------|------------| -| **Trigger** | Cron timer | ✅ | -| **Interval** | 5 minutes | ✅ Config file | -| **Batch size** | 100 memories max | ✅ Config file | -| **Minimum** | None (0 is OK) | — | - -**Config:** `/tr-continuous/curator_config.json` -```json -{ - "timer_minutes": 30, - "max_batch_size": 100, - "user_id": "rob", - "source_collection": "memories_tr", - "target_collection": "gems_tr" -} -``` - -**Cron:** -``` -*/30 * * * * cd .../tr-continuous && python3 curator_timer.py -``` - -**Old modes deprecated:** -- ❌ Turn-based (every N turns) -- ❌ Hybrid (timer + turn) -- ❌ Daily batch (2:45 AM) - -### 1. Real-Time Watcher (Primary Capture) - -**Location:** `~/.openclaw/workspace/skills/qdrant-memory/scripts/realtime_qdrant_watcher.py` - -**Function:** -- Watches `/root/.openclaw/agents/main/sessions/*.jsonl` -- Parses every conversation turn in real-time -- Embeds with `snowflake-arctic-embed2` (Ollama @ ) -- Stores directly to `memories_tr` (no Redis) -- **Cleans content:** Removes markdown, tables, metadata, thinking tags - -**Service:** `mem-qdrant-watcher.service` -- **Status:** Active since 16:46:53 CST -- **Systemd:** Enabled, auto-restart - -**Log:** `journalctl -u mem-qdrant-watcher -f` - ---- - -### 2. Content Cleaner (Existing Data) - -**Location:** `~/.openclaw/workspace/skills/qdrant-memory/scripts/clean_memories_tr.py` - -**Function:** -- Batch-cleans existing `memories_tr` points -- Removes: `**bold**`, `|tables|`, `` `code` ``, `---` rules, `# headers` -- Flattens nested content dicts -- Rate-limited to prevent Qdrant overload - -**Usage:** -```bash -# Dry run (preview) -python3 clean_memories_tr.py --dry-run - -# Clean all -python3 clean_memories_tr.py --execute - -# Clean limited (test) -python3 clean_memories_tr.py --execute --limit 100 -``` - ---- - -### 3. Timer Curator (v2.2 - DEPLOYED) - -**Replaces:** Daily curator (2:45 AM batch) and turn-based curator - -**Location:** `~/.openclaw/workspace/.local_projects/true-recall-v2/tr-continuous/curator_timer.py` - -**Schedule:** Every 30 minutes (cron) - -**Flow:** -1. Query uncurated memories (`curated: false`) -2. Send batch to qwen3 (max 100) -3. Extract gems using curator prompt -4. Store gems to `gems_tr` -5. Mark processed memories as `curated: true` - -**Files:** -| File | Purpose | -|------|---------| -| `curator_timer.py` | Main curator script | -| `curator_config.json` | Adjustable settings | -| `migrate_add_curated.py` | One-time migration (completed) | - -**Usage:** -```bash -# Dry run (preview) -python3 curator_timer.py --dry-run - -# Manual run -python3 curator_timer.py --config curator_config.json -``` - -**Status:** ✅ Deployed, first run will process ~12,378 existing memories - -### 5. Silent Compacting (NEW - Concept) - -**Idea:** Automatically remove old context from prompt when token limit approached. - -**Behavior:** -- Trigger: Context window > 80% full -- Action: Remove oldest messages (silently) -- Preserve: Gems always kept, recent N turns kept -- Result: Seamless conversation without "compacting" notification - -**Config:** -```json -{ - "compacting": { - "enabled": true, - "triggerAtPercent": 80, - "keepRecentTurns": 20, - "preserveGems": true, - "silent": true - } -} -``` - -**Status:** ⏳ Concept only - requires OpenClaw core changes - -### 6. memory-qdrant Plugin - -**Location:** `/root/.openclaw/extensions/memory-qdrant/` - -**Config:** -```json -{ - "collectionName": "gems_tr", - "captureCollection": "memories_tr", - "autoRecall": true, - "autoCapture": true -} -``` - -**Function:** -- **Recall:** Searches `gems_tr`, injects as context (hidden) -- **Capture:** Session-level capture to `memories_tr` (backup) - -**Status:** Loaded, dual collection support working - ---- - -## Files & Locations - -### Core Project Files - -``` -~/.openclaw/workspace/.local_projects/true-recall-v2/ -├── README.md # Architecture docs -├── session.md # This file -├── curator-prompt.md # Gem extraction prompt -├── tr-daily/ # Daily batch curation -│ └── curate_from_qdrant.py # Daily curator (2:45 AM) -├── tr-continuous/ # Real-time curation (NEW) -│ ├── curator_by_count.py # Turn-based curator -│ ├── curator_turn_based.py # Alternative approach -│ ├── curator_cron.sh # Cron wrapper -│ ├── turn-curator.service # Systemd service -│ └── README.md # Documentation -└── shared/ - └── (shared resources) -``` - -### New Files (2026-02-24 19:00) - -| File | Purpose | -|------|---------| -| `tr-continuous/curator_timer.py` | Timer-based curator (deployed) | -| `tr-continuous/curator_config.json` | Curator settings | -| `tr-continuous/migrate_add_curated.py` | Migration script (completed) | - -### Legacy Files (Pre-v2.2) - -| File | Status | Note | -|------|--------|------| -| `tr-daily/curate_from_qdrant.py` | 📦 Archived | Replaced by timer | -| `tr-continuous/curator_by_count.py` | 📦 Archived | Replaced by timer | -| `tr-continuous/curator_turn_based.py` | 📦 Archived | Replaced by timer | - -### System Locations - -| File | Purpose | -|------|---------| -| `/root/.openclaw/extensions/memory-qdrant/` | Plugin code | -| `/root/.openclaw/openclaw.json` | Plugin configuration | -| `/etc/systemd/system/mem-qdrant-watcher.service` | Systemd service | - ---- - -## 🔥 CRITICAL FIXES APPLIED (2026-02-25 12:00-12:41 CST) - -### Issues Found & Fixed Today - -| Issue | Root Cause | Fix Applied | -|-------|------------|-------------| -| **Watcher stuck on old session** | Watcher only checked for new sessions when current file deleted | ✅ Restarted watcher, now follows current session (12:22) | -| **Plugin capture 0 exchanges** | OpenClaw changed to OpenAI content format (array), plugin expected string | ✅ Added `extractMessageText()` to parse content arrays (12:34) | -| **Session switching logic** | Old sessions persisted, watcher never switched | ✅ Fixed session detection logic in watcher | -| **Plugin content extraction** | `msg.content` is now array with `{type, text}` items | ✅ Extracts text from `type: "text"` items | - -### Validation Results (2026-02-25 12:41) - -``` -memory-qdrant: parsed 17 user, 116 assistant messages, 9 exchanges -memory-qdrant: first msg role=user, contentType=array -``` - -**Before:** 0 exchanges extracted -**After:** 9 exchanges captured per session - -### Components Status - -| Component | Before | After | Status | -|-----------|--------|-------|--------| -| Real-time watcher | Stuck on Feb 24 session | Following current session | ✅ Fixed | -| Plugin capture | 0 exchanges | 9 exchanges | ✅ Fixed | -| Context injection | Working | Still working | ✅ Verified | - -### Files Modified (2026-02-25) - -| File | Change | -|------|--------| -| `extensions/memory-qdrant/index.ts` | Added `extractMessageText()` function, removed debug logging | -| `extensions/memory-qdrant/index.js` | Compiled TypeScript changes | -| `session.md` | This update | -| `function_check.md` | Added fixes section | - ---- - -## Changes Made Today (2026-02-24 19:00) - -### 1. Timer Curator Deployed (v2.2) - -- Created `curator_timer.py` — simplified timer-based curation -- Created `curator_config.json` — adjustable settings -- Removed daily 2:45 AM cron job -- Added `*/30 * * * *` cron timer -- **Status:** ✅ Deployed, logs to `/var/log/true-recall-timer.log` - -### 2. Migration Completed - -- Created `migrate_add_curated.py` -- Tagged 12,378 existing memories with `curated: false` -- Updated watcher to add `curated: false` to new memories -- **Status:** ✅ Complete - -### 3. Simplified Architecture - -- ❌ Removed turn-based curator complexity -- ❌ Removed daily batch processing -- ✅ Single timer trigger every 30 minutes -- ✅ No minimum threshold (processes 0-N memories) - ---- - -## Configuration - -### memory-qdrant Plugin - -**File:** `/root/.openclaw/openclaw.json` - -```json -{ - "memory-qdrant": { - "config": { - "autoCapture": true, - "autoRecall": true, - "collectionName": "gems_tr", - "captureCollection": "memories_tr", - "embeddingModel": "snowflake-arctic-embed2", - "maxRecallResults": 2, - "minRecallScore": 0.7, - "ollamaUrl": "http://:11434", - "qdrantUrl": "http://:6333" - }, - "enabled": true - } -} -``` - -### Gateway (OpenClaw Update Fix) - -```json -{ - "gateway": { - "controlUi": { - "allowedOrigins": ["*"], - "allowInsecureAuth": false, - "dangerouslyDisableDeviceAuth": true - } - } -} -``` - ---- - -## Validation Commands - -### Check Collections - -```bash -# Points count -curl -s http://:6333/collections/memories_tr | jq '.result.points_count' -curl -s http://:6333/collections/gems_tr | jq '.result.points_count' - -# Recent points -curl -s -X POST http://:6333/collections/memories_tr/points/scroll \ - -H "Content-Type: application/json" \ - -d '{"limit": 5, "with_payload": true}' | jq '.result.points[].payload.content' -``` - -### Check Services - -```bash -# Watcher status -sudo systemctl status mem-qdrant-watcher - -# Watcher logs -sudo journalctl -u mem-qdrant-watcher -n 20 - -# OpenClaw status -openclaw status -``` - ---- - -## Troubleshooting - -### Issue: Watcher Not Capturing - -**Check:** -1. Service running? `systemctl status mem-qdrant-watcher` -2. Logs: `journalctl -u mem-qdrant-watcher -f` -3. Qdrant accessible? `curl http://:6333/` -4. Ollama accessible? `curl http://:11434/api/tags` - -### Issue: Cleaner Fails - -**Common causes:** -- Qdrant connection timeout (add `time.sleep(0.1)` between batches) -- Nested content dicts (handled in updated script) -- Type errors (non-string content — handled) - -### Issue: Plugin Not Loading - -**Check:** -1. `openclaw.json` syntax valid? `openclaw config validate` -2. Plugin compiled? `cd /root/.openclaw/extensions/memory-qdrant && npx tsc` -3. Gateway logs: `tail /tmp/openclaw/openclaw-$(date +%Y-%m-%d).log` - ---- - -## Cron Schedule (Updated v2.2) - -| Time | Job | Script | Status | -|------|-----|--------|--------| -| Every 30 min | Timer curator | `tr-continuous/curator_timer.py` | ✅ Active | -| Per turn | Capture | `mem-qdrant-watcher` | ✅ Daemon | -| Per turn | Injection | `memory-qdrant` plugin | ✅ Active | - -**Removed:** -- ❌ 2:45 AM daily curator -- ❌ Every-minute turn curator check - ---- - -## Next Steps - -### Immediate -- ⏳ Monitor first timer run (logs: `/var/log/true-recall-timer.log`) -- ⏳ Validate gem extraction quality from timer curator -- ⏳ Archive old curator scripts if timer works - -### Completed ✅ -- ✅ **Compactor config** — Minimal overhead: `mode: default`, `reserveTokensFloor: 0`, `memoryFlush: false` - -### Future -- ⏳ Curator tuning based on timer results -- ⏳ Silent compacting (requires OpenClaw core changes) - -### Planned Features (Backlog) -- ⏳ **Interactive install script** — Prompts for embedding model, timer interval, batch size, endpoints -- ⏳ **Single embedding model option** — Use one model for both collections -- ⏳ **Configurable thresholds** — Per-user customization via prompts - -**Compactor Settings (Applied):** -```json5 -{ - agents: { - defaults: { - compaction: { - mode: "default", - reserveTokensFloor: 0, - memoryFlush: { enabled: false } - } - } - } -} -``` - -**Note:** Only `mode`, `reserveTokensFloor`, and `memoryFlush` are valid under `agents.defaults.compaction`. Other settings are Pi runtime parameters. - -**Install script prompts:** -1. Embedding model (snowflake vs mxbai) -2. Timer interval (5 min / 30 min / hourly) -3. Batch size (50 / 100 / 500) -4. Qdrant/Ollama URLs -5. User ID - ---- - -## Session Recovery - -If starting fresh: -1. Read `README.md` for architecture overview -2. Check service status: `sudo systemctl status mem-qdrant-watcher` -3. Check timer curator: `tail /var/log/true-recall-timer.log` -4. Verify collections: `curl http://:6333/collections` - ---- - -*Last Verified: 2026-02-24 19:29 CST* -*Version: v2.2 (30b curator, install script planned)* diff --git a/tr-continuous/curator_timer.py b/tr-continuous/curator_timer.py index 24907af..3154ae2 100755 --- a/tr-continuous/curator_timer.py +++ b/tr-continuous/curator_timer.py @@ -32,7 +32,7 @@ SCRIPT_DIR = Path(__file__).parent DEFAULT_CONFIG = SCRIPT_DIR / "curator_config.json" # Curator prompt path -CURATOR_PROMPT_PATH = Path("/root/.openclaw/workspace/.local_projects/true-recall-v2/curator-prompt.md") +CURATOR_PROMPT_PATH = Path("~/.openclaw/workspace/.local_projects/true-recall-v2/curator-prompt.md") def load_curator_prompt() -> str: @@ -295,8 +295,8 @@ def main(): config = load_config(args.config) - qdrant_url = os.getenv("QDRANT_URL", "http://10.0.0.40:6333") - ollama_url = os.getenv("OLLAMA_URL", "http://10.0.0.10:11434") + qdrant_url = os.getenv("QDRANT_URL", "http://:6333") + ollama_url = os.getenv("OLLAMA_URL", "http://:11434") user_id = config.get("user_id", "rob") source_collection = config.get("source_collection", "memories_tr") diff --git a/tr-worker/mem-qdrant-watcher.service b/tr-worker/mem-qdrant-watcher.service new file mode 100644 index 0000000..85f25c2 --- /dev/null +++ b/tr-worker/mem-qdrant-watcher.service @@ -0,0 +1,19 @@ +[Unit] +Description=OpenClaw Real-Time Qdrant Memory Watcher +After=network.target + +[Service] +Type=simple +User= +WorkingDirectory=/tr-worker +Environment="QDRANT_URL=http://:6333" +Environment="QDRANT_COLLECTION=memories_tr" +Environment="OLLAMA_URL=http://:11434" +Environment="EMBEDDING_MODEL=snowflake-arctic-embed2" +Environment="USER_ID=" +ExecStart=/usr/bin/python3 /tr-worker/realtime_qdrant_watcher.py --daemon +Restart=always +RestartSec=5 + +[Install] +WantedBy=multi-user.target diff --git a/tr-worker/realtime_qdrant_watcher.py b/tr-worker/realtime_qdrant_watcher.py new file mode 100644 index 0000000..01e918e --- /dev/null +++ b/tr-worker/realtime_qdrant_watcher.py @@ -0,0 +1,332 @@ +#!/usr/bin/env python3 +""" +Real-time Qdrant Watcher: Monitors OpenClaw session JSONL and stores to Qdrant instantly. + +This daemon watches the active session file, embeds each conversation turn, +and stores directly to Qdrant memories_tr collection (real-time, no Redis). + +Usage: + # Run as daemon + python3 realtime_qdrant_watcher.py --daemon + + # Run once (process current session then exit) + python3 realtime_qdrant_watcher.py --once + + # Test mode (print to stdout, don't write to Qdrant) + python3 realtime_qdrant_watcher.py --dry-run + +Systemd service: + # Copy to /etc/systemd/system/mem-qdrant-watcher.service + # systemctl enable --now mem-qdrant-watcher +""" + +import os +import sys +import json +import time +import signal +import hashlib +import argparse +import requests +from datetime import datetime, timezone +from pathlib import Path +from typing import Dict, Any, Optional, List + +# Config - Set via environment variables or use placeholders +QDRANT_URL = os.getenv("QDRANT_URL", "http://:6333") +QDRANT_COLLECTION = os.getenv("QDRANT_COLLECTION", "memories_tr") +OLLAMA_URL = os.getenv("OLLAMA_URL", "http://:11434") +EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "snowflake-arctic-embed2") +USER_ID = os.getenv("USER_ID", "") + +# Paths +SESSIONS_DIR = Path(os.getenv("SESSIONS_DIR", "~/.openclaw/agents/main/sessions")).expanduser() + +# State +running = True +last_position = 0 +current_file = None +turn_counter = 0 + + +def signal_handler(signum, frame): + """Handle shutdown gracefully.""" + global running + print(f"\nReceived signal {signum}, shutting down...", file=sys.stderr) + running = False + + +def get_embedding(text: str) -> List[float]: + """Get embedding vector from Ollama.""" + try: + response = requests.post( + f"{OLLAMA_URL}/api/embeddings", + json={"model": EMBEDDING_MODEL, "prompt": text}, + timeout=30 + ) + response.raise_for_status() + return response.json()["embedding"] + except Exception as e: + print(f"Error getting embedding: {e}", file=sys.stderr) + return None + + +def clean_content(text: str) -> str: + """Clean content - remove metadata, markdown, keep only plain text.""" + import re + + # Remove metadata JSON blocks + text = re.sub(r'Conversation info \(untrusted metadata\):\s*```json\s*\{[\s\S]*?\}\s*```', '', text) + + # Remove thinking tags + text = re.sub(r'\[thinking:[^\]]*\]', '', text) + + # Remove timestamp lines + text = re.sub(r'\[\w{3} \d{4}-\d{2}-\d{2} \d{2}:\d{2} [A-Z]{3}\]', '', text) + + # Remove markdown tables + text = re.sub(r'\|[^\n]*\|', '', text) # Table rows + text = re.sub(r'\|[-:]+\|', '', text) # Table separators + + # Remove markdown formatting + text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text) # Bold **text** + text = re.sub(r'\*([^*]+)\*', r'\1', text) # Italic *text* + text = re.sub(r'`([^`]+)`', r'\1', text) # Inline code `text` + text = re.sub(r'```[\s\S]*?```', '', text) # Code blocks + + # Remove horizontal rules + text = re.sub(r'---+', '', text) + text = re.sub(r'\*\*\*+', '', text) + + # Remove excess whitespace + text = re.sub(r'\n{3,}', '\n', text) + text = re.sub(r'[ \t]+', ' ', text) # Multiple spaces -> single + + return text.strip() + + +def store_to_qdrant(turn: Dict[str, Any], dry_run: bool = False) -> bool: + """Store a single turn to Qdrant with embedding.""" + if dry_run: + print(f"[DRY RUN] Would store turn {turn['turn']} ({turn['role']}): {turn['content'][:60]}...") + return True + + # Get embedding + vector = get_embedding(turn['content']) + if vector is None: + print(f"Failed to get embedding for turn {turn['turn']}", file=sys.stderr) + return False + + # Prepare payload + payload = { + "user_id": turn.get('user_id', USER_ID), + "role": turn['role'], + "content": turn['content'], + "turn": turn['turn'], + "timestamp": turn.get('timestamp', datetime.now(timezone.utc).isoformat()), + "date": datetime.now(timezone.utc).strftime('%Y-%m-%d'), + "source": "realtime_watcher", + "curated": False + } + + # Generate deterministic ID + turn_id = turn.get('turn', 0) + hash_bytes = hashlib.sha256(f"{USER_ID}:turn:{turn_id}:{datetime.now().strftime('%H%M%S')}".encode()).digest()[:8] + point_id = int.from_bytes(hash_bytes, byteorder='big') % (2**63) + + # Store to Qdrant + try: + response = requests.put( + f"{QDRANT_URL}/collections/{QDRANT_COLLECTION}/points", + json={ + "points": [{ + "id": abs(point_id), + "vector": vector, + "payload": payload + }] + }, + timeout=30 + ) + response.raise_for_status() + return True + except Exception as e: + print(f"Error writing to Qdrant: {e}", file=sys.stderr) + return False + + +def get_current_session_file(): + """Find the most recently modified session JSONL file.""" + if not SESSIONS_DIR.exists(): + return None + + files = list(SESSIONS_DIR.glob("*.jsonl")) + if not files: + return None + + return max(files, key=lambda p: p.stat().st_mtime) + + +def parse_turn(line: str, session_name: str) -> Optional[Dict[str, Any]]: + """Parse a single JSONL line into a turn dict.""" + global turn_counter + + try: + entry = json.loads(line.strip()) + except json.JSONDecodeError: + return None + + # OpenClaw format: {"type": "message", "message": {...}} + if entry.get('type') != 'message' or 'message' not in entry: + return None + + msg = entry['message'] + role = msg.get('role') + + # Skip tool results, system, developer messages + if role in ('toolResult', 'system', 'developer'): + return None + + if role not in ('user', 'assistant'): + return None + + # Extract content + content = "" + if isinstance(msg.get('content'), list): + for item in msg['content']: + if isinstance(item, dict) and 'text' in item: + content += item['text'] + elif isinstance(msg.get('content'), str): + content = msg['content'] + + if not content: + return None + + # Clean content + content = clean_content(content) + if not content or len(content) < 5: + return None + + turn_counter += 1 + + return { + 'turn': turn_counter, + 'role': role, + 'content': content[:2000], # Limit size + 'timestamp': entry.get('timestamp', datetime.now(timezone.utc).isoformat()), + 'user_id': USER_ID + } + + +def process_new_lines(f, session_name: str, dry_run: bool = False): + """Process any new lines added to the file.""" + global last_position + + f.seek(last_position) + + for line in f: + line = line.strip() + if not line: + continue + + turn = parse_turn(line, session_name) + if turn: + if store_to_qdrant(turn, dry_run): + print(f"✅ Turn {turn['turn']} ({turn['role']}) → Qdrant") + + last_position = f.tell() + + +def watch_session(session_file: Path, dry_run: bool = False): + """Watch a specific session file for new lines.""" + global last_position, turn_counter + + session_name = session_file.name.replace('.jsonl', '') + print(f"Watching session: {session_file.name}") + + try: + with open(session_file, 'r') as f: + for line in f: + turn_counter += 1 + last_position = session_file.stat().st_size + print(f"Session has {turn_counter} existing turns, starting from position {last_position}") + except Exception as e: + print(f"Warning: Could not read existing turns: {e}", file=sys.stderr) + last_position = 0 + + with open(session_file, 'r') as f: + while running: + if not session_file.exists(): + print("Session file removed, looking for new session...") + return None + + process_new_lines(f, session_name, dry_run) + time.sleep(0.1) + + return session_file + + +def watch_loop(dry_run: bool = False): + """Main watch loop - handles session rotation.""" + global current_file, turn_counter + + while running: + session_file = get_current_session_file() + + if session_file is None: + print("No active session found, waiting...") + time.sleep(1) + continue + + if current_file != session_file: + print(f"\nNew session detected: {session_file.name}") + current_file = session_file + turn_counter = 0 + last_position = 0 + + result = watch_session(session_file, dry_run) + + if result is None: + current_file = None + time.sleep(0.5) + + +def main(): + global USER_ID + + parser = argparse.ArgumentParser( + description="Real-time OpenClaw session watcher → Qdrant" + ) + parser.add_argument("--daemon", "-d", action="store_true", help="Run as daemon") + parser.add_argument("--once", "-o", action="store_true", help="Process once then exit") + parser.add_argument("--dry-run", "-n", action="store_true", help="Don't write to Qdrant") + parser.add_argument("--user-id", "-u", default=USER_ID, help=f"User ID (default: {USER_ID})") + + args = parser.parse_args() + + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + if args.user_id: + USER_ID = args.user_id + + print(f"🔍 Real-time Qdrant Watcher") + print(f"📍 Qdrant: {QDRANT_URL}/{QDRANT_COLLECTION}") + print(f"🧠 Ollama: {OLLAMA_URL}/{EMBEDDING_MODEL}") + print(f"👤 User: {USER_ID}") + print(f"📝 Sessions: {SESSIONS_DIR}") + print() + + if args.once: + print("Running once...") + session_file = get_current_session_file() + if session_file: + watch_session(session_file, args.dry_run) + else: + print("No session found") + else: + print("Running as daemon (Ctrl+C to stop)...") + watch_loop(args.dry_run) + + +if __name__ == "__main__": + main()