Files
jarvis-memory/skills/qdrant-memory/scripts/daily_conversation_backup.py

348 lines
11 KiB
Python
Raw Normal View History

2026-02-23 12:13:04 -06:00
#!/usr/bin/env python3
"""
Daily Conversation Backup - Store day's conversations to Qdrant (Mem0-style)
Reads the daily memory file and stores all conversation turns to Qdrant
as full context (Mem0-style) with persistent user_id. Run at 3:30am daily.
Usage:
daily_conversation_backup.py [YYYY-MM-DD]
# If no date provided, processes yesterday's log
Mem0-style: All conversations linked to persistent user_id.
"""
import argparse
import hashlib
import json
import os
import re
import sys
import urllib.request
import uuid
from datetime import datetime, timedelta
from typing import List, Optional, Dict, Any
QDRANT_URL = "http://10.0.0.40:6333"
COLLECTION_NAME = "kimi_memories"
OLLAMA_URL = "http://10.0.0.10:11434/v1"
MEMORY_DIR = "/root/.openclaw/workspace/memory"
# DEFAULT USER - Mem0-style: memories belong to user
DEFAULT_USER_ID = "yourname"
def get_content_hash(user_msg: str, ai_response: str) -> str:
"""Generate hash for deduplication"""
content = f"{user_msg.strip()}::{ai_response.strip()}"
return hashlib.md5(content.encode()).hexdigest()
def get_embedding(text: str) -> Optional[List[float]]:
"""Generate embedding using snowflake-arctic-embed2"""
data = json.dumps({
"model": "snowflake-arctic-embed2",
"input": text[:8192]
}).encode()
req = urllib.request.Request(
f"{OLLAMA_URL}/embeddings",
data=data,
headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=30) as response:
result = json.loads(response.read().decode())
return result["data"][0]["embedding"]
except Exception as e:
print(f"[DailyBackup] Embedding error: {e}", file=sys.stderr)
return None
def is_duplicate(user_id: str, content_hash: str) -> bool:
"""Check if already stored for this user"""
try:
search_body = {
"filter": {
"must": [
{"key": "user_id", "match": {"value": user_id}},
{"key": "content_hash", "match": {"value": content_hash}}
]
},
"limit": 1,
"with_payload": False
}
req = urllib.request.Request(
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points/scroll",
data=json.dumps(search_body).encode(),
headers={"Content-Type": "application/json"}
)
with urllib.request.urlopen(req, timeout=10) as response:
result = json.loads(response.read().decode())
points = result.get("result", {}).get("points", [])
return len(points) > 0
except Exception:
pass
return False
def parse_daily_log(date_str: str) -> List[Dict[str, str]]:
"""Parse the daily memory file into conversation turns"""
log_file = os.path.join(MEMORY_DIR, f"{date_str}.md")
if not os.path.exists(log_file):
print(f"[DailyBackup] No log file found for {date_str}")
return []
with open(log_file, 'r') as f:
content = f.read()
conversations = []
turn_number = 0
# Split by headers (## [timestamp] ...)
sections = re.split(r'\n##\s+', content)
for section in sections:
if not section.strip():
continue
lines = section.strip().split('\n')
if not lines:
continue
header = lines[0]
body = '\n'.join(lines[1:]).strip()
# Extract user message from header
user_match = re.search(r'\[.*?\]\s*(.+)', header)
if user_match:
user_msg = user_match.group(1)
else:
user_msg = header
# Extract AI response
ai_match = re.search(r'(?:Kimi|Assistant|AI)[:\s]+(.+?)(?=\n##|\Z)', body, re.DOTALL | re.IGNORECASE)
if ai_match:
ai_response = ai_match.group(1).strip()
else:
paragraphs = body.split('\n\n')
if len(paragraphs) > 1:
ai_response = '\n\n'.join(paragraphs[1:]).strip()
else:
ai_response = body
if user_msg and ai_response:
turn_number += 1
conversations.append({
'user': user_msg,
'ai': ai_response,
'turn_number': turn_number,
'date': date_str
})
return conversations
def store_conversation_turn(
user_id: str,
user_message: str,
ai_response: str,
conversation_id: str,
turn_number: int,
date_str: str
) -> bool:
"""Store a single conversation turn to Qdrant (Mem0-style)"""
content_hash = get_content_hash(user_message, ai_response)
# Check duplicate
if is_duplicate(user_id, content_hash):
return True # Already stored, skip silently
# Generate embeddings
user_embedding = get_embedding(user_message)
ai_embedding = get_embedding(ai_response)
summary = f"Q: {user_message[:200]}... A: {ai_response[:300]}..."
summary_embedding = get_embedding(summary)
if not all([user_embedding, ai_embedding, summary_embedding]):
return False
tags = ["conversation", "daily-backup", date_str, f"user:{user_id}"]
importance = "high" if any(kw in (user_message + ai_response).lower()
for kw in ["remember", "important", "always", "never", "rule", "decision"]) else "medium"
points = []
# User message
user_id_point = str(uuid.uuid4())
points.append({
"id": user_id_point,
"vector": user_embedding,
"payload": {
"user_id": user_id,
"text": f"[{user_id}]: {user_message}",
"date": date_str,
"tags": tags + ["user-message"],
"importance": importance,
"source": "conversation_daily_backup",
"source_type": "user",
"category": "Full Conversation",
"confidence": "high",
"verified": True,
"created_at": datetime.now().isoformat(),
"access_count": 0,
"last_accessed": datetime.now().isoformat(),
"conversation_id": conversation_id,
"turn_number": turn_number,
"content_hash": content_hash
}
})
# AI response
ai_id = str(uuid.uuid4())
points.append({
"id": ai_id,
"vector": ai_embedding,
"payload": {
"user_id": user_id,
"text": f"[Kimi]: {ai_response}",
"date": date_str,
"tags": tags + ["ai-response"],
"importance": importance,
"source": "conversation_daily_backup",
"source_type": "assistant",
"category": "Full Conversation",
"confidence": "high",
"verified": True,
"created_at": datetime.now().isoformat(),
"access_count": 0,
"last_accessed": datetime.now().isoformat(),
"conversation_id": conversation_id,
"turn_number": turn_number,
"content_hash": content_hash
}
})
# Summary
summary_id = str(uuid.uuid4())
points.append({
"id": summary_id,
"vector": summary_embedding,
"payload": {
"user_id": user_id,
"text": f"[Turn {turn_number}] {summary}",
"date": date_str,
"tags": tags + ["summary", "combined"],
"importance": importance,
"source": "conversation_summary",
"source_type": "system",
"category": "Conversation Summary",
"confidence": "high",
"verified": True,
"created_at": datetime.now().isoformat(),
"access_count": 0,
"last_accessed": datetime.now().isoformat(),
"conversation_id": conversation_id,
"turn_number": turn_number,
"content_hash": content_hash,
"user_message": user_message[:500],
"ai_response": ai_response[:800]
}
})
# Upload to Qdrant
upsert_data = {"points": points}
req = urllib.request.Request(
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
data=json.dumps(upsert_data).encode(),
headers={"Content-Type": "application/json"},
method="PUT"
)
try:
with urllib.request.urlopen(req, timeout=30) as response:
result = json.loads(response.read().decode())
return result.get("status") == "ok"
except Exception as e:
print(f"[DailyBackup] Storage error: {e}", file=sys.stderr)
return False
def main():
parser = argparse.ArgumentParser(
description="Daily conversation backup to Qdrant (Mem0-style)"
)
parser.add_argument(
"date",
nargs="?",
help="Date to process (YYYY-MM-DD). Default: yesterday"
)
parser.add_argument(
"--user-id",
default=DEFAULT_USER_ID,
help=f"User ID (default: {DEFAULT_USER_ID})"
)
args = parser.parse_args()
if args.date:
date_str = args.date
else:
yesterday = datetime.now() - timedelta(days=1)
date_str = yesterday.strftime("%Y-%m-%d")
user_id = args.user_id
print(f"📅 Processing daily log for {date_str} (user: {user_id})...")
conversations = parse_daily_log(date_str)
if not conversations:
print(f"⚠️ No conversations found for {date_str}")
sys.exit(0)
print(f"📝 Found {len(conversations)} conversation turns")
stored = 0
skipped = 0
failed = 0
for conv in conversations:
conversation_id = str(uuid.uuid4())
content_hash = get_content_hash(conv['user'], conv['ai'])
if is_duplicate(user_id, content_hash):
skipped += 1
print(f" ⏭️ Turn {conv['turn_number']} skipped (duplicate)")
continue
success = store_conversation_turn(
user_id=user_id,
user_message=conv['user'],
ai_response=conv['ai'],
conversation_id=conversation_id,
turn_number=conv['turn_number'],
date_str=date_str
)
if success:
stored += 1
print(f" ✅ Turn {conv['turn_number']} stored")
else:
failed += 1
print(f" ❌ Turn {conv['turn_number']} failed")
print(f"\n{'='*50}")
print(f"Daily backup complete for {date_str} (user: {user_id}):")
print(f" Stored: {stored} turns ({stored * 3} embeddings)")
print(f" Skipped: {skipped} turns (duplicates)")
print(f" Failed: {failed} turns")
if stored > 0:
print(f"\n✅ Daily backup: {stored} conversations stored to Qdrant")
sys.exit(0 if failed == 0 else 1)
if __name__ == "__main__":
main()