Initial commit: Jarvis Memory system

2026-02-23 12:13:04 -06:00
commit e8854cd959
72 changed files with 14801 additions and 0 deletions
--- a/skills/qdrant-memory/scripts/store_conversation.py
+++ b/skills/qdrant-memory/scripts/store_conversation.py
@@ -0,0 +1,303 @@
+#!/usr/bin/env python3
+"""
+Conversation Memory Capture - Store conversational turns to Qdrant
+
+This script stores the full conversational context (user messages + AI responses)
+as atomic facts in Qdrant, not just summaries written to daily logs.
+
+Usage:
+    store_conversation.py "User message" "AI response" --date 2026-02-15 --tags "workflow"
+    store_conversation.py --file conversation.json  # Batch mode
+
+Features:
+    - Stores both user queries and AI responses
+    - Generates embeddings for semantic search
+    - Links related turns with conversation IDs
+    - Extracts facts from responses automatically
+"""
+
+import argparse
+import json
+import os
+import sys
+import urllib.request
+import urllib.error
+import uuid
+from datetime import datetime
+from typing import List, Optional, Dict, Any
+
+QDRANT_URL = "http://10.0.0.40:6333"
+COLLECTION_NAME = "kimi_memories"
+OLLAMA_URL = "http://localhost:11434/v1"
+
+
+def get_embedding(text: str) -> Optional[List[float]]:
+    """Generate embedding using snowflake-arctic-embed2"""
+    data = json.dumps({
+        "model": "snowflake-arctic-embed2",
+        "input": text[:8192]
+    }).encode()
+    
+    req = urllib.request.Request(
+        f"{OLLAMA_URL}/embeddings",
+        data=data,
+        headers={"Content-Type": "application/json"}
+    )
+    
+    try:
+        with urllib.request.urlopen(req, timeout=30) as response:
+            result = json.loads(response.read().decode())
+            return result["data"][0]["embedding"]
+    except Exception as e:
+        print(f"Error generating embedding: {e}", file=sys.stderr)
+        return None
+
+
+def extract_tags(text: str, date_str: str) -> List[str]:
+    """Extract relevant tags from text"""
+    tags = ["conversation-turn", "atomic-fact", date_str]
+    
+    text_lower = text.lower()
+    tag_mappings = {
+        "youtube": "youtube",
+        "video": "video",
+        "workflow": "workflow",
+        "process": "process",
+        "qdrant": "qdrant",
+        "memory": "memory",
+        "fact": "facts",
+        "extract": "extraction",
+        "config": "configuration",
+        "setting": "settings",
+        "rule": "rules",
+        "decision": "decisions",
+        "preference": "preferences",
+        "hardware": "hardware",
+        "security": "security",
+        "research": "research",
+        "step": "steps",
+        "grok": "grok",
+        "thumbnail": "thumbnail",
+        "title": "title",
+        "description": "description",
+        "seo": "seo",
+        "tags": "tags",
+    }
+    
+    for keyword, tag in tag_mappings.items():
+        if keyword in text_lower:
+            tags.append(tag)
+    
+    return list(set(tags))
+
+
+def store_turn(
+    speaker: str,
+    message: str,
+    date_str: str,
+    tags: List[str] = None,
+    conversation_id: str = None,
+    turn_number: int = None,
+    importance: str = "medium"
+) -> Optional[str]:
+    """Store a single conversational turn"""
+    
+    embedding = get_embedding(message)
+    if embedding is None:
+        return None
+    
+    point_id = str(uuid.uuid4())
+    
+    if tags is None:
+        tags = extract_tags(message, date_str)
+    
+    payload = {
+        "text": f"[{speaker}]: {message}",
+        "date": date_str,
+        "tags": tags,
+        "importance": importance,
+        "source": "conversation",
+        "source_type": "user" if speaker == "Rob" else "assistant",
+        "category": "Conversation",
+        "confidence": "high",
+        "verified": True,
+        "created_at": datetime.now().isoformat(),
+        "access_count": 0,
+        "last_accessed": datetime.now().isoformat(),
+        "conversation_id": conversation_id or str(uuid.uuid4()),
+        "turn_number": turn_number or 0
+    }
+    
+    upsert_data = {
+        "points": [{
+            "id": point_id,
+            "vector": embedding,
+            "payload": payload
+        }]
+    }
+    
+    req = urllib.request.Request(
+        f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
+        data=json.dumps(upsert_data).encode(),
+        headers={"Content-Type": "application/json"},
+        method="PUT"
+    )
+    
+    try:
+        with urllib.request.urlopen(req, timeout=10) as response:
+            result = json.loads(response.read().decode())
+            if result.get("status") == "ok":
+                return point_id
+    except Exception as e:
+        print(f"Error storing turn: {e}", file=sys.stderr)
+    
+    return None
+
+
+def store_conversation_pair(
+    user_message: str,
+    ai_response: str,
+    date_str: str,
+    tags: List[str] = None,
+    importance: str = "medium"
+) -> tuple:
+    """Store both user query and AI response as linked turns"""
+    
+    conversation_id = str(uuid.uuid4())
+    
+    user_id = store_turn(
+        speaker="Rob",
+        message=user_message,
+        date_str=date_str,
+        tags=tags,
+        conversation_id=conversation_id,
+        turn_number=1,
+        importance=importance
+    )
+    
+    ai_id = store_turn(
+        speaker="Kimi",
+        message=ai_response,
+        date_str=date_str,
+        tags=tags,
+        conversation_id=conversation_id,
+        turn_number=2,
+        importance=importance
+    )
+    
+    return user_id, ai_id
+
+
+def extract_facts_from_text(text: str, date_str: str) -> List[Dict[str, Any]]:
+    """Extract atomic facts from a text block"""
+    facts = []
+    
+    # Split into sentences
+    sentences = [s.strip() for s in text.replace('. ', '.\n').split('\n') if s.strip()]
+    
+    for sentence in sentences:
+        if len(sentence) < 10:
+            continue
+        
+        embedding = get_embedding(sentence)
+        if embedding is None:
+            continue
+        
+        point_id = str(uuid.uuid4())
+        
+        facts.append({
+            "id": point_id,
+            "vector": embedding,
+            "payload": {
+                "text": sentence[:500],
+                "date": date_str,
+                "tags": extract_tags(sentence, date_str),
+                "importance": "high" if "**" in sentence else "medium",
+                "source": "fact-extraction",
+                "source_type": "inferred",
+                "category": "Extracted Fact",
+                "confidence": "medium",
+                "verified": False,
+                "created_at": datetime.now().isoformat(),
+                "access_count": 0,
+                "last_accessed": datetime.now().isoformat()
+            }
+        })
+    
+    return facts
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Store conversational turns to Qdrant")
+    parser.add_argument("user_message", nargs="?", help="User's message/query")
+    parser.add_argument("ai_response", nargs="?", help="AI's response")
+    parser.add_argument("--date", default=datetime.now().strftime("%Y-%m-%d"), help="Date (YYYY-MM-DD)")
+    parser.add_argument("--tags", help="Comma-separated tags")
+    parser.add_argument("--importance", default="medium", choices=["low", "medium", "high"])
+    parser.add_argument("--file", help="JSON file with conversation array")
+    parser.add_argument("--extract-facts", action="store_true", help="Also extract atomic facts from response")
+    
+    args = parser.parse_args()
+    
+    tags = args.tags.split(",") if args.tags else None
+    
+    if args.file:
+        # Batch mode from JSON file
+        with open(args.file, 'r') as f:
+            conversations = json.load(f)
+        
+        total = 0
+        for conv in conversations:
+            user_id, ai_id = store_conversation_pair(
+                conv["user"],
+                conv["ai"],
+                args.date,
+                tags or conv.get("tags"),
+                args.importance
+            )
+            if user_id and ai_id:
+                total += 2
+        
+        print(f"✅ Stored {total} conversation turns")
+    
+    elif args.user_message and args.ai_response:
+        # Single pair mode
+        user_id, ai_id = store_conversation_pair(
+            args.user_message,
+            args.ai_response,
+            args.date,
+            tags,
+            args.importance
+        )
+        
+        if user_id and ai_id:
+            print(f"✅ Stored conversation pair")
+            print(f"   User turn: {user_id[:8]}...")
+            print(f"   AI turn: {ai_id[:8]}...")
+            
+            if args.extract_facts:
+                facts = extract_facts_from_text(args.ai_response, args.date)
+                if facts:
+                    # Upload facts
+                    upsert_data = {"points": facts}
+                    req = urllib.request.Request(
+                        f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
+                        data=json.dumps(upsert_data).encode(),
+                        headers={"Content-Type": "application/json"},
+                        method="PUT"
+                    )
+                    try:
+                        with urllib.request.urlopen(req, timeout=30) as response:
+                            print(f"   Extracted {len(facts)} additional facts")
+                    except Exception as e:
+                        print(f"   Warning: Could not store extracted facts: {e}")
+        else:
+            print("❌ Failed to store conversation")
+            sys.exit(1)
+    else:
+        parser.print_help()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()