skills/qdrant-memory/scripts/daily_conversation_backup.py

#!/usr/bin/env python3
"""
Daily Conversation Backup - Store day's conversations to Qdrant (Mem0-style)

Reads the daily memory file and stores all conversation turns to Qdrant
as full context (Mem0-style) with persistent user_id. Run at 3:30am daily.

Usage:
    daily_conversation_backup.py [YYYY-MM-DD]
    # If no date provided, processes yesterday's log

Mem0-style: All conversations linked to persistent user_id.
"""

import argparse
import hashlib
import json
import os
import re
import sys
import urllib.request
import uuid
from datetime import datetime, timedelta
from typing import List, Optional, Dict, Any

QDRANT_URL = "http://10.0.0.40:6333"
COLLECTION_NAME = "kimi_memories"
OLLAMA_URL = "http://10.0.0.10:11434/v1"
MEMORY_DIR = "/root/.openclaw/workspace/memory"

# DEFAULT USER - Mem0-style: memories belong to user
DEFAULT_USER_ID = "yourname"

def get_content_hash(user_msg: str, ai_response: str) -> str:
    """Generate hash for deduplication"""
    content = f"{user_msg.strip()}::{ai_response.strip()}"
    return hashlib.md5(content.encode()).hexdigest()

def get_embedding(text: str) -> Optional[List[float]]:
    """Generate embedding using snowflake-arctic-embed2"""
    data = json.dumps({
        "model": "snowflake-arctic-embed2",
        "input": text[:8192]
    }).encode()
    
    req = urllib.request.Request(
        f"{OLLAMA_URL}/embeddings",
        data=data,
        headers={"Content-Type": "application/json"}
    )
    
    try:
        with urllib.request.urlopen(req, timeout=30) as response:
            result = json.loads(response.read().decode())
            return result["data"][0]["embedding"]
    except Exception as e:
        print(f"[DailyBackup] Embedding error: {e}", file=sys.stderr)
        return None

def is_duplicate(user_id: str, content_hash: str) -> bool:
    """Check if already stored for this user"""
    try:
        search_body = {
            "filter": {
                "must": [
                    {"key": "user_id", "match": {"value": user_id}},
                    {"key": "content_hash", "match": {"value": content_hash}}
                ]
            },
            "limit": 1,
            "with_payload": False
        }
        
        req = urllib.request.Request(
            f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points/scroll",
            data=json.dumps(search_body).encode(),
            headers={"Content-Type": "application/json"}
        )
        
        with urllib.request.urlopen(req, timeout=10) as response:
            result = json.loads(response.read().decode())
            points = result.get("result", {}).get("points", [])
            return len(points) > 0
    except Exception:
        pass
    return False

def parse_daily_log(date_str: str) -> List[Dict[str, str]]:
    """Parse the daily memory file into conversation turns"""
    log_file = os.path.join(MEMORY_DIR, f"{date_str}.md")
    
    if not os.path.exists(log_file):
        print(f"[DailyBackup] No log file found for {date_str}")
        return []
    
    with open(log_file, 'r') as f:
        content = f.read()
    
    conversations = []
    turn_number = 0
    
    # Split by headers (## [timestamp] ...)
    sections = re.split(r'\n##\s+', content)
    
    for section in sections:
        if not section.strip():
            continue
        
        lines = section.strip().split('\n')
        if not lines:
            continue
        
        header = lines[0]
        body = '\n'.join(lines[1:]).strip()
        
        # Extract user message from header
        user_match = re.search(r'\[.*?\]\s*(.+)', header)
        if user_match:
            user_msg = user_match.group(1)
        else:
            user_msg = header
        
        # Extract AI response
        ai_match = re.search(r'(?:Kimi|Assistant|AI)[:\s]+(.+?)(?=\n##|\Z)', body, re.DOTALL | re.IGNORECASE)
        if ai_match:
            ai_response = ai_match.group(1).strip()
        else:
            paragraphs = body.split('\n\n')
            if len(paragraphs) > 1:
                ai_response = '\n\n'.join(paragraphs[1:]).strip()
            else:
                ai_response = body
        
        if user_msg and ai_response:
            turn_number += 1
            conversations.append({
                'user': user_msg,
                'ai': ai_response,
                'turn_number': turn_number,
                'date': date_str
            })
    
    return conversations

def store_conversation_turn(
    user_id: str,
    user_message: str,
    ai_response: str,
    conversation_id: str,
    turn_number: int,
    date_str: str
) -> bool:
    """Store a single conversation turn to Qdrant (Mem0-style)"""
    
    content_hash = get_content_hash(user_message, ai_response)
    
    # Check duplicate
    if is_duplicate(user_id, content_hash):
        return True  # Already stored, skip silently
    
    # Generate embeddings
    user_embedding = get_embedding(user_message)
    ai_embedding = get_embedding(ai_response)
    summary = f"Q: {user_message[:200]}... A: {ai_response[:300]}..."
    summary_embedding = get_embedding(summary)
    
    if not all([user_embedding, ai_embedding, summary_embedding]):
        return False
    
    tags = ["conversation", "daily-backup", date_str, f"user:{user_id}"]
    importance = "high" if any(kw in (user_message + ai_response).lower() 
                               for kw in ["remember", "important", "always", "never", "rule", "decision"]) else "medium"
    
    points = []
    
    # User message
    user_id_point = str(uuid.uuid4())
    points.append({
        "id": user_id_point,
        "vector": user_embedding,
        "payload": {
            "user_id": user_id,
            "text": f"[{user_id}]: {user_message}",
            "date": date_str,
            "tags": tags + ["user-message"],
            "importance": importance,
            "source": "conversation_daily_backup",
            "source_type": "user",
            "category": "Full Conversation",
            "confidence": "high",
            "verified": True,
            "created_at": datetime.now().isoformat(),
            "access_count": 0,
            "last_accessed": datetime.now().isoformat(),
            "conversation_id": conversation_id,
            "turn_number": turn_number,
            "content_hash": content_hash
        }
    })
    
    # AI response
    ai_id = str(uuid.uuid4())
    points.append({
        "id": ai_id,
        "vector": ai_embedding,
        "payload": {
            "user_id": user_id,
            "text": f"[Kimi]: {ai_response}",
            "date": date_str,
            "tags": tags + ["ai-response"],
            "importance": importance,
            "source": "conversation_daily_backup",
            "source_type": "assistant",
            "category": "Full Conversation",
            "confidence": "high",
            "verified": True,
            "created_at": datetime.now().isoformat(),
            "access_count": 0,
            "last_accessed": datetime.now().isoformat(),
            "conversation_id": conversation_id,
            "turn_number": turn_number,
            "content_hash": content_hash
        }
    })
    
    # Summary
    summary_id = str(uuid.uuid4())
    points.append({
        "id": summary_id,
        "vector": summary_embedding,
        "payload": {
            "user_id": user_id,
            "text": f"[Turn {turn_number}] {summary}",
            "date": date_str,
            "tags": tags + ["summary", "combined"],
            "importance": importance,
            "source": "conversation_summary",
            "source_type": "system",
            "category": "Conversation Summary",
            "confidence": "high",
            "verified": True,
            "created_at": datetime.now().isoformat(),
            "access_count": 0,
            "last_accessed": datetime.now().isoformat(),
            "conversation_id": conversation_id,
            "turn_number": turn_number,
            "content_hash": content_hash,
            "user_message": user_message[:500],
            "ai_response": ai_response[:800]
        }
    })
    
    # Upload to Qdrant
    upsert_data = {"points": points}
    
    req = urllib.request.Request(
        f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
        data=json.dumps(upsert_data).encode(),
        headers={"Content-Type": "application/json"},
        method="PUT"
    )
    
    try:
        with urllib.request.urlopen(req, timeout=30) as response:
            result = json.loads(response.read().decode())
            return result.get("status") == "ok"
    except Exception as e:
        print(f"[DailyBackup] Storage error: {e}", file=sys.stderr)
        return False

def main():
    parser = argparse.ArgumentParser(
        description="Daily conversation backup to Qdrant (Mem0-style)"
    )
    parser.add_argument(
        "date",
        nargs="?",
        help="Date to process (YYYY-MM-DD). Default: yesterday"
    )
    parser.add_argument(
        "--user-id",
        default=DEFAULT_USER_ID,
        help=f"User ID (default: {DEFAULT_USER_ID})"
    )
    
    args = parser.parse_args()
    
    if args.date:
        date_str = args.date
    else:
        yesterday = datetime.now() - timedelta(days=1)
        date_str = yesterday.strftime("%Y-%m-%d")
    
    user_id = args.user_id
    
    print(f"📅 Processing daily log for {date_str} (user: {user_id})...")
    
    conversations = parse_daily_log(date_str)
    
    if not conversations:
        print(f"⚠️  No conversations found for {date_str}")
        sys.exit(0)
    
    print(f"📝 Found {len(conversations)} conversation turns")
    
    stored = 0
    skipped = 0
    failed = 0
    
    for conv in conversations:
        conversation_id = str(uuid.uuid4())
        
        content_hash = get_content_hash(conv['user'], conv['ai'])
        if is_duplicate(user_id, content_hash):
            skipped += 1
            print(f"  ⏭️  Turn {conv['turn_number']} skipped (duplicate)")
            continue
        
        success = store_conversation_turn(
            user_id=user_id,
            user_message=conv['user'],
            ai_response=conv['ai'],
            conversation_id=conversation_id,
            turn_number=conv['turn_number'],
            date_str=date_str
        )
        
        if success:
            stored += 1
            print(f"  ✅ Turn {conv['turn_number']} stored")
        else:
            failed += 1
            print(f"  ❌ Turn {conv['turn_number']} failed")
    
    print(f"\n{'='*50}")
    print(f"Daily backup complete for {date_str} (user: {user_id}):")
    print(f"  Stored: {stored} turns ({stored * 3} embeddings)")
    print(f"  Skipped: {skipped} turns (duplicates)")
    print(f"  Failed: {failed} turns")
    
    if stored > 0:
        print(f"\n✅ Daily backup: {stored} conversations stored to Qdrant")
    
    sys.exit(0 if failed == 0 else 1)

if __name__ == "__main__":
    main()
Initial commit: Jarvis Memory system 2026-02-23 12:13:04 -06:00			`#!/usr/bin/env python3`
			`"""`
			`Daily Conversation Backup - Store day's conversations to Qdrant (Mem0-style)`

			`Reads the daily memory file and stores all conversation turns to Qdrant`
			`as full context (Mem0-style) with persistent user_id. Run at 3:30am daily.`

			`Usage:`
			`daily_conversation_backup.py [YYYY-MM-DD]`
			`# If no date provided, processes yesterday's log`

			`Mem0-style: All conversations linked to persistent user_id.`
			`"""`

			`import argparse`
			`import hashlib`
			`import json`
			`import os`
			`import re`
			`import sys`
			`import urllib.request`
			`import uuid`
			`from datetime import datetime, timedelta`
			`from typing import List, Optional, Dict, Any`

			`QDRANT_URL = "http://10.0.0.40:6333"`
			`COLLECTION_NAME = "kimi_memories"`
			`OLLAMA_URL = "http://10.0.0.10:11434/v1"`
			`MEMORY_DIR = "/root/.openclaw/workspace/memory"`

			`# DEFAULT USER - Mem0-style: memories belong to user`
			`DEFAULT_USER_ID = "yourname"`

			`def get_content_hash(user_msg: str, ai_response: str) -> str:`
			`"""Generate hash for deduplication"""`
			`content = f"{user_msg.strip()}::{ai_response.strip()}"`
			`return hashlib.md5(content.encode()).hexdigest()`

			`def get_embedding(text: str) -> Optional[List[float]]:`
			`"""Generate embedding using snowflake-arctic-embed2"""`
			`data = json.dumps({`
			`"model": "snowflake-arctic-embed2",`
			`"input": text[:8192]`
			`}).encode()`

			`req = urllib.request.Request(`
			`f"{OLLAMA_URL}/embeddings",`
			`data=data,`
			`headers={"Content-Type": "application/json"}`
			`)`

			`try:`
			`with urllib.request.urlopen(req, timeout=30) as response:`
			`result = json.loads(response.read().decode())`
			`return result["data"][0]["embedding"]`
			`except Exception as e:`
			`print(f"[DailyBackup] Embedding error: {e}", file=sys.stderr)`
			`return None`

			`def is_duplicate(user_id: str, content_hash: str) -> bool:`
			`"""Check if already stored for this user"""`
			`try:`
			`search_body = {`
			`"filter": {`
			`"must": [`
			`{"key": "user_id", "match": {"value": user_id}},`
			`{"key": "content_hash", "match": {"value": content_hash}}`
			`]`
			`},`
			`"limit": 1,`
			`"with_payload": False`
			`}`

			`req = urllib.request.Request(`
			`f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points/scroll",`
			`data=json.dumps(search_body).encode(),`
			`headers={"Content-Type": "application/json"}`
			`)`

			`with urllib.request.urlopen(req, timeout=10) as response:`
			`result = json.loads(response.read().decode())`
			`points = result.get("result", {}).get("points", [])`
			`return len(points) > 0`
			`except Exception:`
			`pass`
			`return False`

			`def parse_daily_log(date_str: str) -> List[Dict[str, str]]:`
			`"""Parse the daily memory file into conversation turns"""`
			`log_file = os.path.join(MEMORY_DIR, f"{date_str}.md")`

			`if not os.path.exists(log_file):`
			`print(f"[DailyBackup] No log file found for {date_str}")`
			`return []`

			`with open(log_file, 'r') as f:`
			`content = f.read()`

			`conversations = []`
			`turn_number = 0`

			`# Split by headers (## [timestamp] ...)`
			`sections = re.split(r'\n##\s+', content)`

			`for section in sections:`
			`if not section.strip():`
			`continue`

			`lines = section.strip().split('\n')`
			`if not lines:`
			`continue`

			`header = lines[0]`
			`body = '\n'.join(lines[1:]).strip()`

			`# Extract user message from header`
			`user_match = re.search(r'\[.?\]\s(.+)', header)`
			`if user_match:`
			`user_msg = user_match.group(1)`
			`else:`
			`user_msg = header`

			`# Extract AI response`
			`ai_match = re.search(r'(?:Kimi\|Assistant\|AI)[:\s]+(.+?)(?=\n##\|\Z)', body, re.DOTALL \| re.IGNORECASE)`
			`if ai_match:`
			`ai_response = ai_match.group(1).strip()`
			`else:`
			`paragraphs = body.split('\n\n')`
			`if len(paragraphs) > 1:`
			`ai_response = '\n\n'.join(paragraphs[1:]).strip()`
			`else:`
			`ai_response = body`

			`if user_msg and ai_response:`
			`turn_number += 1`
			`conversations.append({`
			`'user': user_msg,`
			`'ai': ai_response,`
			`'turn_number': turn_number,`
			`'date': date_str`
			`})`

			`return conversations`

			`def store_conversation_turn(`
			`user_id: str,`
			`user_message: str,`
			`ai_response: str,`
			`conversation_id: str,`
			`turn_number: int,`
			`date_str: str`
			`) -> bool:`
			`"""Store a single conversation turn to Qdrant (Mem0-style)"""`

			`content_hash = get_content_hash(user_message, ai_response)`

			`# Check duplicate`
			`if is_duplicate(user_id, content_hash):`
			`return True # Already stored, skip silently`

			`# Generate embeddings`
			`user_embedding = get_embedding(user_message)`
			`ai_embedding = get_embedding(ai_response)`
			`summary = f"Q: {user_message[:200]}... A: {ai_response[:300]}..."`
			`summary_embedding = get_embedding(summary)`

			`if not all([user_embedding, ai_embedding, summary_embedding]):`
			`return False`

			`tags = ["conversation", "daily-backup", date_str, f"user:{user_id}"]`
			`importance = "high" if any(kw in (user_message + ai_response).lower()`
			`for kw in ["remember", "important", "always", "never", "rule", "decision"]) else "medium"`

			`points = []`

			`# User message`
			`user_id_point = str(uuid.uuid4())`
			`points.append({`
			`"id": user_id_point,`
			`"vector": user_embedding,`
			`"payload": {`
			`"user_id": user_id,`
			`"text": f"[{user_id}]: {user_message}",`
			`"date": date_str,`
			`"tags": tags + ["user-message"],`
			`"importance": importance,`
			`"source": "conversation_daily_backup",`
			`"source_type": "user",`
			`"category": "Full Conversation",`
			`"confidence": "high",`
			`"verified": True,`
			`"created_at": datetime.now().isoformat(),`
			`"access_count": 0,`
			`"last_accessed": datetime.now().isoformat(),`
			`"conversation_id": conversation_id,`
			`"turn_number": turn_number,`
			`"content_hash": content_hash`
			`}`
			`})`

			`# AI response`
			`ai_id = str(uuid.uuid4())`
			`points.append({`
			`"id": ai_id,`
			`"vector": ai_embedding,`
			`"payload": {`
			`"user_id": user_id,`
			`"text": f"[Kimi]: {ai_response}",`
			`"date": date_str,`
			`"tags": tags + ["ai-response"],`
			`"importance": importance,`
			`"source": "conversation_daily_backup",`
			`"source_type": "assistant",`
			`"category": "Full Conversation",`
			`"confidence": "high",`
			`"verified": True,`
			`"created_at": datetime.now().isoformat(),`
			`"access_count": 0,`
			`"last_accessed": datetime.now().isoformat(),`
			`"conversation_id": conversation_id,`
			`"turn_number": turn_number,`
			`"content_hash": content_hash`
			`}`
			`})`

			`# Summary`
			`summary_id = str(uuid.uuid4())`
			`points.append({`
			`"id": summary_id,`
			`"vector": summary_embedding,`
			`"payload": {`
			`"user_id": user_id,`
			`"text": f"[Turn {turn_number}] {summary}",`
			`"date": date_str,`
			`"tags": tags + ["summary", "combined"],`
			`"importance": importance,`
			`"source": "conversation_summary",`
			`"source_type": "system",`
			`"category": "Conversation Summary",`
			`"confidence": "high",`
			`"verified": True,`
			`"created_at": datetime.now().isoformat(),`
			`"access_count": 0,`
			`"last_accessed": datetime.now().isoformat(),`
			`"conversation_id": conversation_id,`
			`"turn_number": turn_number,`
			`"content_hash": content_hash,`
			`"user_message": user_message[:500],`
			`"ai_response": ai_response[:800]`
			`}`
			`})`

			`# Upload to Qdrant`
			`upsert_data = {"points": points}`

			`req = urllib.request.Request(`
			`f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",`
			`data=json.dumps(upsert_data).encode(),`
			`headers={"Content-Type": "application/json"},`
			`method="PUT"`
			`)`

			`try:`
			`with urllib.request.urlopen(req, timeout=30) as response:`
			`result = json.loads(response.read().decode())`
			`return result.get("status") == "ok"`
			`except Exception as e:`
			`print(f"[DailyBackup] Storage error: {e}", file=sys.stderr)`
			`return False`

			`def main():`
			`parser = argparse.ArgumentParser(`
			`description="Daily conversation backup to Qdrant (Mem0-style)"`
			`)`
			`parser.add_argument(`
			`"date",`
			`nargs="?",`
			`help="Date to process (YYYY-MM-DD). Default: yesterday"`
			`)`
			`parser.add_argument(`
			`"--user-id",`
			`default=DEFAULT_USER_ID,`
			`help=f"User ID (default: {DEFAULT_USER_ID})"`
			`)`

			`args = parser.parse_args()`

			`if args.date:`
			`date_str = args.date`
			`else:`
			`yesterday = datetime.now() - timedelta(days=1)`
			`date_str = yesterday.strftime("%Y-%m-%d")`

			`user_id = args.user_id`

			`print(f"📅 Processing daily log for {date_str} (user: {user_id})...")`

			`conversations = parse_daily_log(date_str)`

			`if not conversations:`
			`print(f"⚠️ No conversations found for {date_str}")`
			`sys.exit(0)`

			`print(f"📝 Found {len(conversations)} conversation turns")`

			`stored = 0`
			`skipped = 0`
			`failed = 0`

			`for conv in conversations:`
			`conversation_id = str(uuid.uuid4())`

			`content_hash = get_content_hash(conv['user'], conv['ai'])`
			`if is_duplicate(user_id, content_hash):`
			`skipped += 1`
			`print(f" ⏭️ Turn {conv['turn_number']} skipped (duplicate)")`
			`continue`

			`success = store_conversation_turn(`
			`user_id=user_id,`
			`user_message=conv['user'],`
			`ai_response=conv['ai'],`
			`conversation_id=conversation_id,`
			`turn_number=conv['turn_number'],`
			`date_str=date_str`
			`)`

			`if success:`
			`stored += 1`
			`print(f" ✅ Turn {conv['turn_number']} stored")`
			`else:`
			`failed += 1`
			`print(f" ❌ Turn {conv['turn_number']} failed")`

			`print(f"\n{'='*50}")`
			`print(f"Daily backup complete for {date_str} (user: {user_id}):")`
			`print(f" Stored: {stored} turns ({stored * 3} embeddings)")`
			`print(f" Skipped: {skipped} turns (duplicates)")`
			`print(f" Failed: {failed} turns")`

			`if stored > 0:`
			`print(f"\n✅ Daily backup: {stored} conversations stored to Qdrant")`

			`sys.exit(0 if failed == 0 else 1)`

			`if __name__ == "__main__":`
			`main()`