Initial commit: Jarvis Memory system
This commit is contained in:
298
skills/qdrant-memory/scripts/bulk_migrate.py
Executable file
298
skills/qdrant-memory/scripts/bulk_migrate.py
Executable file
@@ -0,0 +1,298 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Bulk memory migration to Qdrant kimi_memories collection
|
||||
Uses snowflake-arctic-embed2 (1024 dimensions)
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import urllib.request
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "kimi_memories"
|
||||
OLLAMA_URL = "http://localhost:11434/v1"
|
||||
|
||||
MEMORY_DIR = "/root/.openclaw/workspace/memory"
|
||||
MEMORY_MD = "/root/.openclaw/workspace/MEMORY.md"
|
||||
|
||||
def get_embedding(text):
|
||||
"""Generate embedding using snowflake-arctic-embed2 via Ollama"""
|
||||
data = json.dumps({
|
||||
"model": "snowflake-arctic-embed2",
|
||||
"input": text[:8192] # Limit text length
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{OLLAMA_URL}/embeddings",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=60) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result["data"][0]["embedding"]
|
||||
except Exception as e:
|
||||
print(f"Error generating embedding: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def store_memory(text, embedding, tags=None, importance="medium", date=None,
|
||||
source="memory_backup", confidence="high", source_type="user",
|
||||
verified=True):
|
||||
"""Store memory in Qdrant with metadata"""
|
||||
|
||||
if date is None:
|
||||
date = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
point_id = str(uuid.uuid4())
|
||||
|
||||
payload = {
|
||||
"text": text,
|
||||
"date": date,
|
||||
"tags": tags or [],
|
||||
"importance": importance,
|
||||
"confidence": confidence,
|
||||
"source_type": source_type,
|
||||
"verified": verified,
|
||||
"source": source,
|
||||
"created_at": datetime.now().isoformat(),
|
||||
"access_count": 0
|
||||
}
|
||||
|
||||
point = {
|
||||
"id": point_id,
|
||||
"vector": embedding,
|
||||
"payload": payload
|
||||
}
|
||||
|
||||
data = json.dumps({"points": [point]}).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("result", {}).get("status") == "ok"
|
||||
except Exception as e:
|
||||
print(f"Error storing memory: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def extract_memories_from_file(filepath, importance="medium"):
|
||||
"""Extract memory entries from a markdown file"""
|
||||
memories = []
|
||||
|
||||
try:
|
||||
with open(filepath, 'r') as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading {filepath}: {e}", file=sys.stderr)
|
||||
return memories
|
||||
|
||||
# Extract date from filename or content
|
||||
date_match = re.search(r'(\d{4}-\d{2}-\d{2})', filepath)
|
||||
date = date_match.group(1) if date_match else datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
# Parse sections
|
||||
lines = content.split('\n')
|
||||
current_section = None
|
||||
current_content = []
|
||||
|
||||
for line in lines:
|
||||
# Section headers
|
||||
if line.startswith('# ') and 'Memory' in line:
|
||||
continue # Skip title
|
||||
elif line.startswith('## '):
|
||||
# Save previous section
|
||||
if current_section and current_content:
|
||||
section_text = '\n'.join(current_content).strip()
|
||||
if len(section_text) > 20:
|
||||
memories.append({
|
||||
"text": f"{current_section}: {section_text}",
|
||||
"date": date,
|
||||
"tags": extract_tags(current_section, section_text),
|
||||
"importance": importance
|
||||
})
|
||||
current_section = line[3:].strip()
|
||||
current_content = []
|
||||
elif line.startswith('### '):
|
||||
# Save previous section
|
||||
if current_section and current_content:
|
||||
section_text = '\n'.join(current_content).strip()
|
||||
if len(section_text) > 20:
|
||||
memories.append({
|
||||
"text": f"{current_section}: {section_text}",
|
||||
"date": date,
|
||||
"tags": extract_tags(current_section, section_text),
|
||||
"importance": importance
|
||||
})
|
||||
current_section = line[4:].strip()
|
||||
current_content = []
|
||||
else:
|
||||
if current_section:
|
||||
current_content.append(line)
|
||||
|
||||
# Save final section
|
||||
if current_section and current_content:
|
||||
section_text = '\n'.join(current_content).strip()
|
||||
if len(section_text) > 20:
|
||||
memories.append({
|
||||
"text": f"{current_section}: {section_text}",
|
||||
"date": date,
|
||||
"tags": extract_tags(current_section, section_text),
|
||||
"importance": importance
|
||||
})
|
||||
|
||||
return memories
|
||||
|
||||
def extract_tags(section, content):
|
||||
"""Extract relevant tags from section and content"""
|
||||
tags = []
|
||||
|
||||
# Section-based tags
|
||||
if any(word in section.lower() for word in ['voice', 'tts', 'stt', 'audio']):
|
||||
tags.extend(['voice', 'audio'])
|
||||
if any(word in section.lower() for word in ['memory', 'qdrant', 'remember']):
|
||||
tags.extend(['memory', 'qdrant'])
|
||||
if any(word in section.lower() for word in ['redis', 'agent', 'message', 'max']):
|
||||
tags.extend(['redis', 'messaging', 'agent'])
|
||||
if any(word in section.lower() for word in ['youtube', 'seo', 'content']):
|
||||
tags.extend(['youtube', 'content'])
|
||||
if any(word in section.lower() for word in ['search', 'searxng', 'web']):
|
||||
tags.extend(['search', 'web'])
|
||||
if any(word in section.lower() for word in ['setup', 'install', 'bootstrap']):
|
||||
tags.extend(['setup', 'configuration'])
|
||||
|
||||
# Content-based tags
|
||||
content_lower = content.lower()
|
||||
if 'voice' in content_lower:
|
||||
tags.append('voice')
|
||||
if 'memory' in content_lower:
|
||||
tags.append('memory')
|
||||
if 'qdrant' in content_lower:
|
||||
tags.append('qdrant')
|
||||
if 'redis' in content_lower:
|
||||
tags.append('redis')
|
||||
if 'youtube' in content_lower:
|
||||
tags.append('youtube')
|
||||
if 'rob' in content_lower:
|
||||
tags.append('user')
|
||||
|
||||
return list(set(tags)) # Remove duplicates
|
||||
|
||||
def extract_core_memories_from_memory_md():
|
||||
"""Extract high-importance memories from MEMORY.md"""
|
||||
memories = []
|
||||
|
||||
try:
|
||||
with open(MEMORY_MD, 'r') as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading MEMORY.md: {e}", file=sys.stderr)
|
||||
return memories
|
||||
|
||||
# Core sections with high importance
|
||||
sections = [
|
||||
("Identity & Names", "high"),
|
||||
("Core Preferences", "high"),
|
||||
("Communication Rules", "high"),
|
||||
("Voice Settings", "high"),
|
||||
("Lessons Learned", "high"),
|
||||
]
|
||||
|
||||
for section_name, importance in sections:
|
||||
pattern = f"## {section_name}.*?(?=## |$)"
|
||||
match = re.search(pattern, content, re.DOTALL)
|
||||
if match:
|
||||
section_text = match.group(0).strip()
|
||||
# Extract subsections
|
||||
subsections = re.findall(r'### (.+?)\n', section_text)
|
||||
for sub in subsections:
|
||||
sub_pattern = f"### {re.escape(sub)}.*?(?=### |## |$)"
|
||||
sub_match = re.search(sub_pattern, section_text, re.DOTALL)
|
||||
if sub_match:
|
||||
sub_text = sub_match.group(0).strip()
|
||||
if len(sub_text) > 50:
|
||||
memories.append({
|
||||
"text": f"{section_name} - {sub}: {sub_text[:500]}",
|
||||
"date": "2026-02-10",
|
||||
"tags": extract_tags(section_name, sub_text) + ['core', 'longterm'],
|
||||
"importance": importance
|
||||
})
|
||||
|
||||
return memories
|
||||
|
||||
def main():
|
||||
print("Starting bulk memory migration to kimi_memories...")
|
||||
print(f"Collection: {COLLECTION_NAME}")
|
||||
print(f"Model: snowflake-arctic-embed2 (1024 dims)")
|
||||
print()
|
||||
|
||||
all_memories = []
|
||||
|
||||
# Extract from daily logs
|
||||
for filename in sorted(os.listdir(MEMORY_DIR)):
|
||||
if filename.endswith('.md') and filename.startswith('2026'):
|
||||
filepath = os.path.join(MEMORY_DIR, filename)
|
||||
print(f"Processing {filename}...")
|
||||
memories = extract_memories_from_file(filepath, importance="medium")
|
||||
all_memories.extend(memories)
|
||||
print(f" Extracted {len(memories)} memories")
|
||||
|
||||
# Extract from MEMORY.md
|
||||
print("Processing MEMORY.md...")
|
||||
core_memories = extract_core_memories_from_memory_md()
|
||||
all_memories.extend(core_memories)
|
||||
print(f" Extracted {len(core_memories)} core memories")
|
||||
|
||||
print(f"\nTotal memories to store: {len(all_memories)}")
|
||||
print()
|
||||
|
||||
# Store each memory
|
||||
success_count = 0
|
||||
fail_count = 0
|
||||
|
||||
for i, memory in enumerate(all_memories, 1):
|
||||
print(f"[{i}/{len(all_memories)}] Storing: {memory['text'][:60]}...")
|
||||
|
||||
# Generate embedding
|
||||
embedding = get_embedding(memory['text'])
|
||||
if embedding is None:
|
||||
print(f" ❌ Failed to generate embedding")
|
||||
fail_count += 1
|
||||
continue
|
||||
|
||||
# Store in Qdrant
|
||||
if store_memory(
|
||||
text=memory['text'],
|
||||
embedding=embedding,
|
||||
tags=memory['tags'],
|
||||
importance=memory['importance'],
|
||||
date=memory['date'],
|
||||
source="bulk_migration",
|
||||
confidence="high",
|
||||
source_type="user",
|
||||
verified=True
|
||||
):
|
||||
print(f" ✅ Stored")
|
||||
success_count += 1
|
||||
else:
|
||||
print(f" ❌ Failed to store")
|
||||
fail_count += 1
|
||||
|
||||
print()
|
||||
print("=" * 50)
|
||||
print(f"Migration complete!")
|
||||
print(f" Success: {success_count}")
|
||||
print(f" Failed: {fail_count}")
|
||||
print(f" Total: {len(all_memories)}")
|
||||
print("=" * 50)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user