Initial commit: TrueRecall v2.2 with 30b curator and timer-based curation

2026-02-24 20:27:44 -06:00
commit 8bb1abaf18
23 changed files with 4112 additions and 0 deletions
--- a/migrate_memories.py
+++ b/migrate_memories.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+"""
+Migrate memories from kimi_memories to memories_tr
+- Reads from kimi_memories (Qdrant)
+- Cleans/strips noise (metadata, thinking tags)
+- Stores to memories_tr (Qdrant)
+- Keeps original kimi_memories intact
+"""
+
+import json
+import urllib.request
+import urllib.error
+from datetime import datetime
+from typing import List, Dict, Any
+
+QDRANT_URL = "http://10.0.0.40:6333"
+SOURCE_COLLECTION = "kimi_memories"
+TARGET_COLLECTION = "memories_tr"
+
+def clean_content(text: str) -> str:
+    """Clean noise from content"""
+    if not text:
+        return ""
+    
+    cleaned = text
+    
+    # Remove metadata JSON blocks
+    import re
+    cleaned = re.sub(r'Conversation info \(untrusted metadata\):\s*```json\s*\{[\s\S]*?\}\s*```', '', cleaned)
+    
+    # Remove thinking tags
+    cleaned = re.sub(r'\[thinking:[^\]]*\]', '', cleaned)
+    
+    # Remove timestamp lines
+    cleaned = re.sub(r'\[\w{3} \d{4}-\d{2}-\d{2} \d{2}:\d{2} [A-Z]{3}\]', '', cleaned)
+    
+    # Clean up whitespace
+    cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
+    cleaned = cleaned.strip()
+    
+    return cleaned
+
+def get_all_points(collection: str) -> List[Dict]:
+    """Get all points from a collection"""
+    all_points = []
+    offset = None
+    max_iterations = 1000
+    iterations = 0
+    
+    while iterations < max_iterations:
+        iterations += 1
+        scroll_data = {
+            "limit": 100,
+            "with_payload": True,
+            "with_vector": True
+        }
+        
+        if offset:
+            scroll_data["offset"] = offset
+        
+        req = urllib.request.Request(
+            f"{QDRANT_URL}/collections/{collection}/points/scroll",
+            data=json.dumps(scroll_data).encode(),
+            headers={"Content-Type": "application/json"},
+            method="POST"
+        )
+        
+        try:
+            with urllib.request.urlopen(req, timeout=60) as response:
+                result = json.loads(response.read().decode())
+                points = result.get("result", {}).get("points", [])
+                
+                if not points:
+                    break
+                
+                all_points.extend(points)
+                
+                offset = result.get("result", {}).get("next_page_offset")
+                if not offset:
+                    break
+        except urllib.error.HTTPError as e:
+            print(f"Error: {e}")
+            break
+    
+    return all_points
+
+def store_points(collection: str, points: List[Dict]) -> int:
+    """Store points to collection"""
+    if not points:
+        return 0
+    
+    # Batch upload
+    batch_size = 100
+    stored = 0
+    
+    for i in range(0, len(points), batch_size):
+        batch = points[i:i+batch_size]
+        
+        points_data = {
+            "points": batch
+        }
+        
+        req = urllib.request.Request(
+            f"{QDRANT_URL}/collections/{collection}/points",
+            data=json.dumps(points_data).encode(),
+            headers={"Content-Type": "application/json"},
+            method="PUT"
+        )
+        
+        try:
+            with urllib.request.urlopen(req, timeout=60) as response:
+                if response.status == 200:
+                    stored += len(batch)
+        except urllib.error.HTTPError as e:
+            print(f"Error storing batch: {e}")
+    
+    return stored
+
+def migrate_point(point: Dict) -> Dict:
+    """Clean a single point"""
+    payload = point.get("payload", {})
+    
+    # Clean user and AI messages
+    user_msg = clean_content(payload.get("user_message", ""))
+    ai_msg = clean_content(payload.get("ai_response", ""))
+    
+    # Keep other fields
+    cleaned_payload = {
+        **payload,
+        "user_message": user_msg,
+        "ai_response": ai_msg,
+        "migrated_from": "kimi_memories",
+        "migrated_at": datetime.now().isoformat()
+    }
+    
+    return {
+        "id": point.get("id"),
+        "vector": point.get("vector"),
+        "payload": cleaned_payload
+    }
+
+def main():
+    print("=" * 60)
+    print("Memory Migration: kimi_memories → memories_tr")
+    print("=" * 60)
+    print()
+    
+    # Check source
+    print(f"📥 Reading from {SOURCE_COLLECTION}...")
+    source_points = get_all_points(SOURCE_COLLECTION)
+    print(f"   Found {len(source_points)} points")
+    
+    if not source_points:
+        print("❌ No points to migrate")
+        return
+    
+    # Clean points
+    print(f"\n🧹 Cleaning {len(source_points)} points...")
+    cleaned_points = [migrate_point(p) for p in source_points]
+    print(f"   ✓ Cleaned")
+    
+    # Store to target
+    print(f"\n💾 Storing to {TARGET_COLLECTION}...")
+    stored = store_points(TARGET_COLLECTION, cleaned_points)
+    print(f"   ✓ Stored {stored} points")
+    
+    # Verify
+    print(f"\n🔍 Verifying...")
+    target_points = get_all_points(TARGET_COLLECTION)
+    print(f"   Target now has {len(target_points)} points")
+    
+    # Summary
+    print()
+    print("=" * 60)
+    print("Migration Summary:")
+    print(f"  Source ({SOURCE_COLLECTION}): {len(source_points)} points")
+    print(f"  Target ({TARGET_COLLECTION}): {len(target_points)} points")
+    print(f"  Cleaned & migrated: {stored} points")
+    print("=" * 60)
+    
+    if stored == len(source_points):
+        print("\n✅ Migration complete!")
+    else:
+        print(f"\n⚠️  Warning: Only migrated {stored}/{len(source_points)} points")
+
+if __name__ == "__main__":
+    main()