Initial commit: Jarvis Memory system

2026-02-23 12:13:04 -06:00
commit e8854cd959
72 changed files with 14801 additions and 0 deletions
--- a/skills/qdrant-memory/scripts/bulk_migrate.py
+++ b/skills/qdrant-memory/scripts/bulk_migrate.py
@@ -0,0 +1,298 @@
+#!/usr/bin/env python3
+"""
+Bulk memory migration to Qdrant kimi_memories collection
+Uses snowflake-arctic-embed2 (1024 dimensions)
+"""
+
+import json
+import os
+import re
+import sys
+import urllib.request
+import uuid
+from datetime import datetime
+
+QDRANT_URL = "http://10.0.0.40:6333"
+COLLECTION_NAME = "kimi_memories"
+OLLAMA_URL = "http://localhost:11434/v1"
+
+MEMORY_DIR = "/root/.openclaw/workspace/memory"
+MEMORY_MD = "/root/.openclaw/workspace/MEMORY.md"
+
+def get_embedding(text):
+    """Generate embedding using snowflake-arctic-embed2 via Ollama"""
+    data = json.dumps({
+        "model": "snowflake-arctic-embed2",
+        "input": text[:8192]  # Limit text length
+    }).encode()
+    
+    req = urllib.request.Request(
+        f"{OLLAMA_URL}/embeddings",
+        data=data,
+        headers={"Content-Type": "application/json"}
+    )
+    
+    try:
+        with urllib.request.urlopen(req, timeout=60) as response:
+            result = json.loads(response.read().decode())
+            return result["data"][0]["embedding"]
+    except Exception as e:
+        print(f"Error generating embedding: {e}", file=sys.stderr)
+        return None
+
+def store_memory(text, embedding, tags=None, importance="medium", date=None, 
+                 source="memory_backup", confidence="high", source_type="user",
+                 verified=True):
+    """Store memory in Qdrant with metadata"""
+    
+    if date is None:
+        date = datetime.now().strftime("%Y-%m-%d")
+    
+    point_id = str(uuid.uuid4())
+    
+    payload = {
+        "text": text,
+        "date": date,
+        "tags": tags or [],
+        "importance": importance,
+        "confidence": confidence,
+        "source_type": source_type,
+        "verified": verified,
+        "source": source,
+        "created_at": datetime.now().isoformat(),
+        "access_count": 0
+    }
+    
+    point = {
+        "id": point_id,
+        "vector": embedding,
+        "payload": payload
+    }
+    
+    data = json.dumps({"points": [point]}).encode()
+    req = urllib.request.Request(
+        f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
+        data=data,
+        headers={"Content-Type": "application/json"}
+    )
+    
+    try:
+        with urllib.request.urlopen(req, timeout=30) as response:
+            result = json.loads(response.read().decode())
+            return result.get("result", {}).get("status") == "ok"
+    except Exception as e:
+        print(f"Error storing memory: {e}", file=sys.stderr)
+        return False
+
+def extract_memories_from_file(filepath, importance="medium"):
+    """Extract memory entries from a markdown file"""
+    memories = []
+    
+    try:
+        with open(filepath, 'r') as f:
+            content = f.read()
+    except Exception as e:
+        print(f"Error reading {filepath}: {e}", file=sys.stderr)
+        return memories
+    
+    # Extract date from filename or content
+    date_match = re.search(r'(\d{4}-\d{2}-\d{2})', filepath)
+    date = date_match.group(1) if date_match else datetime.now().strftime("%Y-%m-%d")
+    
+    # Parse sections
+    lines = content.split('\n')
+    current_section = None
+    current_content = []
+    
+    for line in lines:
+        # Section headers
+        if line.startswith('# ') and 'Memory' in line:
+            continue  # Skip title
+        elif line.startswith('## '):
+            # Save previous section
+            if current_section and current_content:
+                section_text = '\n'.join(current_content).strip()
+                if len(section_text) > 20:
+                    memories.append({
+                        "text": f"{current_section}: {section_text}",
+                        "date": date,
+                        "tags": extract_tags(current_section, section_text),
+                        "importance": importance
+                    })
+            current_section = line[3:].strip()
+            current_content = []
+        elif line.startswith('### '):
+            # Save previous section
+            if current_section and current_content:
+                section_text = '\n'.join(current_content).strip()
+                if len(section_text) > 20:
+                    memories.append({
+                        "text": f"{current_section}: {section_text}",
+                        "date": date,
+                        "tags": extract_tags(current_section, section_text),
+                        "importance": importance
+                    })
+            current_section = line[4:].strip()
+            current_content = []
+        else:
+            if current_section:
+                current_content.append(line)
+    
+    # Save final section
+    if current_section and current_content:
+        section_text = '\n'.join(current_content).strip()
+        if len(section_text) > 20:
+            memories.append({
+                "text": f"{current_section}: {section_text}",
+                "date": date,
+                "tags": extract_tags(current_section, section_text),
+                "importance": importance
+            })
+    
+    return memories
+
+def extract_tags(section, content):
+    """Extract relevant tags from section and content"""
+    tags = []
+    
+    # Section-based tags
+    if any(word in section.lower() for word in ['voice', 'tts', 'stt', 'audio']):
+        tags.extend(['voice', 'audio'])
+    if any(word in section.lower() for word in ['memory', 'qdrant', 'remember']):
+        tags.extend(['memory', 'qdrant'])
+    if any(word in section.lower() for word in ['redis', 'agent', 'message', 'max']):
+        tags.extend(['redis', 'messaging', 'agent'])
+    if any(word in section.lower() for word in ['youtube', 'seo', 'content']):
+        tags.extend(['youtube', 'content'])
+    if any(word in section.lower() for word in ['search', 'searxng', 'web']):
+        tags.extend(['search', 'web'])
+    if any(word in section.lower() for word in ['setup', 'install', 'bootstrap']):
+        tags.extend(['setup', 'configuration'])
+    
+    # Content-based tags
+    content_lower = content.lower()
+    if 'voice' in content_lower:
+        tags.append('voice')
+    if 'memory' in content_lower:
+        tags.append('memory')
+    if 'qdrant' in content_lower:
+        tags.append('qdrant')
+    if 'redis' in content_lower:
+        tags.append('redis')
+    if 'youtube' in content_lower:
+        tags.append('youtube')
+    if 'rob' in content_lower:
+        tags.append('user')
+    
+    return list(set(tags))  # Remove duplicates
+
+def extract_core_memories_from_memory_md():
+    """Extract high-importance memories from MEMORY.md"""
+    memories = []
+    
+    try:
+        with open(MEMORY_MD, 'r') as f:
+            content = f.read()
+    except Exception as e:
+        print(f"Error reading MEMORY.md: {e}", file=sys.stderr)
+        return memories
+    
+    # Core sections with high importance
+    sections = [
+        ("Identity & Names", "high"),
+        ("Core Preferences", "high"),
+        ("Communication Rules", "high"),
+        ("Voice Settings", "high"),
+        ("Lessons Learned", "high"),
+    ]
+    
+    for section_name, importance in sections:
+        pattern = f"## {section_name}.*?(?=## |$)"
+        match = re.search(pattern, content, re.DOTALL)
+        if match:
+            section_text = match.group(0).strip()
+            # Extract subsections
+            subsections = re.findall(r'### (.+?)\n', section_text)
+            for sub in subsections:
+                sub_pattern = f"### {re.escape(sub)}.*?(?=### |## |$)"
+                sub_match = re.search(sub_pattern, section_text, re.DOTALL)
+                if sub_match:
+                    sub_text = sub_match.group(0).strip()
+                    if len(sub_text) > 50:
+                        memories.append({
+                            "text": f"{section_name} - {sub}: {sub_text[:500]}",
+                            "date": "2026-02-10",
+                            "tags": extract_tags(section_name, sub_text) + ['core', 'longterm'],
+                            "importance": importance
+                        })
+    
+    return memories
+
+def main():
+    print("Starting bulk memory migration to kimi_memories...")
+    print(f"Collection: {COLLECTION_NAME}")
+    print(f"Model: snowflake-arctic-embed2 (1024 dims)")
+    print()
+    
+    all_memories = []
+    
+    # Extract from daily logs
+    for filename in sorted(os.listdir(MEMORY_DIR)):
+        if filename.endswith('.md') and filename.startswith('2026'):
+            filepath = os.path.join(MEMORY_DIR, filename)
+            print(f"Processing {filename}...")
+            memories = extract_memories_from_file(filepath, importance="medium")
+            all_memories.extend(memories)
+            print(f"  Extracted {len(memories)} memories")
+    
+    # Extract from MEMORY.md
+    print("Processing MEMORY.md...")
+    core_memories = extract_core_memories_from_memory_md()
+    all_memories.extend(core_memories)
+    print(f"  Extracted {len(core_memories)} core memories")
+    
+    print(f"\nTotal memories to store: {len(all_memories)}")
+    print()
+    
+    # Store each memory
+    success_count = 0
+    fail_count = 0
+    
+    for i, memory in enumerate(all_memories, 1):
+        print(f"[{i}/{len(all_memories)}] Storing: {memory['text'][:60]}...")
+        
+        # Generate embedding
+        embedding = get_embedding(memory['text'])
+        if embedding is None:
+            print(f"  ❌ Failed to generate embedding")
+            fail_count += 1
+            continue
+        
+        # Store in Qdrant
+        if store_memory(
+            text=memory['text'],
+            embedding=embedding,
+            tags=memory['tags'],
+            importance=memory['importance'],
+            date=memory['date'],
+            source="bulk_migration",
+            confidence="high",
+            source_type="user",
+            verified=True
+        ):
+            print(f"  ✅ Stored")
+            success_count += 1
+        else:
+            print(f"  ❌ Failed to store")
+            fail_count += 1
+    
+    print()
+    print("=" * 50)
+    print(f"Migration complete!")
+    print(f"  Success: {success_count}")
+    print(f"  Failed: {fail_count}")
+    print(f"  Total: {len(all_memories)}")
+    print("=" * 50)
+
+if __name__ == "__main__":
+    main()