skills/qdrant-memory/scripts/store_conversation.py

#!/usr/bin/env python3
"""
Conversation Memory Capture - Store conversational turns to Qdrant

This script stores the full conversational context (user messages + AI responses)
as atomic facts in Qdrant, not just summaries written to daily logs.

Usage:
    store_conversation.py "User message" "AI response" --date 2026-02-15 --tags "workflow"
    store_conversation.py --file conversation.json  # Batch mode

Features:
    - Stores both user queries and AI responses
    - Generates embeddings for semantic search
    - Links related turns with conversation IDs
    - Extracts facts from responses automatically
"""

import argparse
import json
import os
import sys
import urllib.request
import urllib.error
import uuid
from datetime import datetime
from typing import List, Optional, Dict, Any

QDRANT_URL = "http://10.0.0.40:6333"
COLLECTION_NAME = "kimi_memories"
OLLAMA_URL = "http://localhost:11434/v1"


def get_embedding(text: str) -> Optional[List[float]]:
    """Generate embedding using snowflake-arctic-embed2"""
    data = json.dumps({
        "model": "snowflake-arctic-embed2",
        "input": text[:8192]
    }).encode()
    
    req = urllib.request.Request(
        f"{OLLAMA_URL}/embeddings",
        data=data,
        headers={"Content-Type": "application/json"}
    )
    
    try:
        with urllib.request.urlopen(req, timeout=30) as response:
            result = json.loads(response.read().decode())
            return result["data"][0]["embedding"]
    except Exception as e:
        print(f"Error generating embedding: {e}", file=sys.stderr)
        return None


def extract_tags(text: str, date_str: str) -> List[str]:
    """Extract relevant tags from text"""
    tags = ["conversation-turn", "atomic-fact", date_str]
    
    text_lower = text.lower()
    tag_mappings = {
        "youtube": "youtube",
        "video": "video",
        "workflow": "workflow",
        "process": "process",
        "qdrant": "qdrant",
        "memory": "memory",
        "fact": "facts",
        "extract": "extraction",
        "config": "configuration",
        "setting": "settings",
        "rule": "rules",
        "decision": "decisions",
        "preference": "preferences",
        "hardware": "hardware",
        "security": "security",
        "research": "research",
        "step": "steps",
        "grok": "grok",
        "thumbnail": "thumbnail",
        "title": "title",
        "description": "description",
        "seo": "seo",
        "tags": "tags",
    }
    
    for keyword, tag in tag_mappings.items():
        if keyword in text_lower:
            tags.append(tag)
    
    return list(set(tags))


def store_turn(
    speaker: str,
    message: str,
    date_str: str,
    tags: List[str] = None,
    conversation_id: str = None,
    turn_number: int = None,
    importance: str = "medium"
) -> Optional[str]:
    """Store a single conversational turn"""
    
    embedding = get_embedding(message)
    if embedding is None:
        return None
    
    point_id = str(uuid.uuid4())
    
    if tags is None:
        tags = extract_tags(message, date_str)
    
    payload = {
        "text": f"[{speaker}]: {message}",
        "date": date_str,
        "tags": tags,
        "importance": importance,
        "source": "conversation",
        "source_type": "user" if speaker == "Rob" else "assistant",
        "category": "Conversation",
        "confidence": "high",
        "verified": True,
        "created_at": datetime.now().isoformat(),
        "access_count": 0,
        "last_accessed": datetime.now().isoformat(),
        "conversation_id": conversation_id or str(uuid.uuid4()),
        "turn_number": turn_number or 0
    }
    
    upsert_data = {
        "points": [{
            "id": point_id,
            "vector": embedding,
            "payload": payload
        }]
    }
    
    req = urllib.request.Request(
        f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
        data=json.dumps(upsert_data).encode(),
        headers={"Content-Type": "application/json"},
        method="PUT"
    )
    
    try:
        with urllib.request.urlopen(req, timeout=10) as response:
            result = json.loads(response.read().decode())
            if result.get("status") == "ok":
                return point_id
    except Exception as e:
        print(f"Error storing turn: {e}", file=sys.stderr)
    
    return None


def store_conversation_pair(
    user_message: str,
    ai_response: str,
    date_str: str,
    tags: List[str] = None,
    importance: str = "medium"
) -> tuple:
    """Store both user query and AI response as linked turns"""
    
    conversation_id = str(uuid.uuid4())
    
    user_id = store_turn(
        speaker="Rob",
        message=user_message,
        date_str=date_str,
        tags=tags,
        conversation_id=conversation_id,
        turn_number=1,
        importance=importance
    )
    
    ai_id = store_turn(
        speaker="Kimi",
        message=ai_response,
        date_str=date_str,
        tags=tags,
        conversation_id=conversation_id,
        turn_number=2,
        importance=importance
    )
    
    return user_id, ai_id


def extract_facts_from_text(text: str, date_str: str) -> List[Dict[str, Any]]:
    """Extract atomic facts from a text block"""
    facts = []
    
    # Split into sentences
    sentences = [s.strip() for s in text.replace('. ', '.\n').split('\n') if s.strip()]
    
    for sentence in sentences:
        if len(sentence) < 10:
            continue
        
        embedding = get_embedding(sentence)
        if embedding is None:
            continue
        
        point_id = str(uuid.uuid4())
        
        facts.append({
            "id": point_id,
            "vector": embedding,
            "payload": {
                "text": sentence[:500],
                "date": date_str,
                "tags": extract_tags(sentence, date_str),
                "importance": "high" if "**" in sentence else "medium",
                "source": "fact-extraction",
                "source_type": "inferred",
                "category": "Extracted Fact",
                "confidence": "medium",
                "verified": False,
                "created_at": datetime.now().isoformat(),
                "access_count": 0,
                "last_accessed": datetime.now().isoformat()
            }
        })
    
    return facts


def main():
    parser = argparse.ArgumentParser(description="Store conversational turns to Qdrant")
    parser.add_argument("user_message", nargs="?", help="User's message/query")
    parser.add_argument("ai_response", nargs="?", help="AI's response")
    parser.add_argument("--date", default=datetime.now().strftime("%Y-%m-%d"), help="Date (YYYY-MM-DD)")
    parser.add_argument("--tags", help="Comma-separated tags")
    parser.add_argument("--importance", default="medium", choices=["low", "medium", "high"])
    parser.add_argument("--file", help="JSON file with conversation array")
    parser.add_argument("--extract-facts", action="store_true", help="Also extract atomic facts from response")
    
    args = parser.parse_args()
    
    tags = args.tags.split(",") if args.tags else None
    
    if args.file:
        # Batch mode from JSON file
        with open(args.file, 'r') as f:
            conversations = json.load(f)
        
        total = 0
        for conv in conversations:
            user_id, ai_id = store_conversation_pair(
                conv["user"],
                conv["ai"],
                args.date,
                tags or conv.get("tags"),
                args.importance
            )
            if user_id and ai_id:
                total += 2
        
        print(f"✅ Stored {total} conversation turns")
    
    elif args.user_message and args.ai_response:
        # Single pair mode
        user_id, ai_id = store_conversation_pair(
            args.user_message,
            args.ai_response,
            args.date,
            tags,
            args.importance
        )
        
        if user_id and ai_id:
            print(f"✅ Stored conversation pair")
            print(f"   User turn: {user_id[:8]}...")
            print(f"   AI turn: {ai_id[:8]}...")
            
            if args.extract_facts:
                facts = extract_facts_from_text(args.ai_response, args.date)
                if facts:
                    # Upload facts
                    upsert_data = {"points": facts}
                    req = urllib.request.Request(
                        f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
                        data=json.dumps(upsert_data).encode(),
                        headers={"Content-Type": "application/json"},
                        method="PUT"
                    )
                    try:
                        with urllib.request.urlopen(req, timeout=30) as response:
                            print(f"   Extracted {len(facts)} additional facts")
                    except Exception as e:
                        print(f"   Warning: Could not store extracted facts: {e}")
        else:
            print("❌ Failed to store conversation")
            sys.exit(1)
    else:
        parser.print_help()
        sys.exit(1)


if __name__ == "__main__":
    main()
Initial commit: Jarvis Memory system 2026-02-23 12:13:04 -06:00			`#!/usr/bin/env python3`
			`"""`
			`Conversation Memory Capture - Store conversational turns to Qdrant`

			`This script stores the full conversational context (user messages + AI responses)`
			`as atomic facts in Qdrant, not just summaries written to daily logs.`

			`Usage:`
			`store_conversation.py "User message" "AI response" --date 2026-02-15 --tags "workflow"`
			`store_conversation.py --file conversation.json # Batch mode`

			`Features:`
			`- Stores both user queries and AI responses`
			`- Generates embeddings for semantic search`
			`- Links related turns with conversation IDs`
			`- Extracts facts from responses automatically`
			`"""`

			`import argparse`
			`import json`
			`import os`
			`import sys`
			`import urllib.request`
			`import urllib.error`
			`import uuid`
			`from datetime import datetime`
			`from typing import List, Optional, Dict, Any`

			`QDRANT_URL = "http://10.0.0.40:6333"`
			`COLLECTION_NAME = "kimi_memories"`
			`OLLAMA_URL = "http://localhost:11434/v1"`


			`def get_embedding(text: str) -> Optional[List[float]]:`
			`"""Generate embedding using snowflake-arctic-embed2"""`
			`data = json.dumps({`
			`"model": "snowflake-arctic-embed2",`
			`"input": text[:8192]`
			`}).encode()`

			`req = urllib.request.Request(`
			`f"{OLLAMA_URL}/embeddings",`
			`data=data,`
			`headers={"Content-Type": "application/json"}`
			`)`

			`try:`
			`with urllib.request.urlopen(req, timeout=30) as response:`
			`result = json.loads(response.read().decode())`
			`return result["data"][0]["embedding"]`
			`except Exception as e:`
			`print(f"Error generating embedding: {e}", file=sys.stderr)`
			`return None`


			`def extract_tags(text: str, date_str: str) -> List[str]:`
			`"""Extract relevant tags from text"""`
			`tags = ["conversation-turn", "atomic-fact", date_str]`

			`text_lower = text.lower()`
			`tag_mappings = {`
			`"youtube": "youtube",`
			`"video": "video",`
			`"workflow": "workflow",`
			`"process": "process",`
			`"qdrant": "qdrant",`
			`"memory": "memory",`
			`"fact": "facts",`
			`"extract": "extraction",`
			`"config": "configuration",`
			`"setting": "settings",`
			`"rule": "rules",`
			`"decision": "decisions",`
			`"preference": "preferences",`
			`"hardware": "hardware",`
			`"security": "security",`
			`"research": "research",`
			`"step": "steps",`
			`"grok": "grok",`
			`"thumbnail": "thumbnail",`
			`"title": "title",`
			`"description": "description",`
			`"seo": "seo",`
			`"tags": "tags",`
			`}`

			`for keyword, tag in tag_mappings.items():`
			`if keyword in text_lower:`
			`tags.append(tag)`

			`return list(set(tags))`


			`def store_turn(`
			`speaker: str,`
			`message: str,`
			`date_str: str,`
			`tags: List[str] = None,`
			`conversation_id: str = None,`
			`turn_number: int = None,`
			`importance: str = "medium"`
			`) -> Optional[str]:`
			`"""Store a single conversational turn"""`

			`embedding = get_embedding(message)`
			`if embedding is None:`
			`return None`

			`point_id = str(uuid.uuid4())`

			`if tags is None:`
			`tags = extract_tags(message, date_str)`

			`payload = {`
			`"text": f"[{speaker}]: {message}",`
			`"date": date_str,`
			`"tags": tags,`
			`"importance": importance,`
			`"source": "conversation",`
			`"source_type": "user" if speaker == "Rob" else "assistant",`
			`"category": "Conversation",`
			`"confidence": "high",`
			`"verified": True,`
			`"created_at": datetime.now().isoformat(),`
			`"access_count": 0,`
			`"last_accessed": datetime.now().isoformat(),`
			`"conversation_id": conversation_id or str(uuid.uuid4()),`
			`"turn_number": turn_number or 0`
			`}`

			`upsert_data = {`
			`"points": [{`
			`"id": point_id,`
			`"vector": embedding,`
			`"payload": payload`
			`}]`
			`}`

			`req = urllib.request.Request(`
			`f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",`
			`data=json.dumps(upsert_data).encode(),`
			`headers={"Content-Type": "application/json"},`
			`method="PUT"`
			`)`

			`try:`
			`with urllib.request.urlopen(req, timeout=10) as response:`
			`result = json.loads(response.read().decode())`
			`if result.get("status") == "ok":`
			`return point_id`
			`except Exception as e:`
			`print(f"Error storing turn: {e}", file=sys.stderr)`

			`return None`


			`def store_conversation_pair(`
			`user_message: str,`
			`ai_response: str,`
			`date_str: str,`
			`tags: List[str] = None,`
			`importance: str = "medium"`
			`) -> tuple:`
			`"""Store both user query and AI response as linked turns"""`

			`conversation_id = str(uuid.uuid4())`

			`user_id = store_turn(`
			`speaker="Rob",`
			`message=user_message,`
			`date_str=date_str,`
			`tags=tags,`
			`conversation_id=conversation_id,`
			`turn_number=1,`
			`importance=importance`
			`)`

			`ai_id = store_turn(`
			`speaker="Kimi",`
			`message=ai_response,`
			`date_str=date_str,`
			`tags=tags,`
			`conversation_id=conversation_id,`
			`turn_number=2,`
			`importance=importance`
			`)`

			`return user_id, ai_id`


			`def extract_facts_from_text(text: str, date_str: str) -> List[Dict[str, Any]]:`
			`"""Extract atomic facts from a text block"""`
			`facts = []`

			`# Split into sentences`
			`sentences = [s.strip() for s in text.replace('. ', '.\n').split('\n') if s.strip()]`

			`for sentence in sentences:`
			`if len(sentence) < 10:`
			`continue`

			`embedding = get_embedding(sentence)`
			`if embedding is None:`
			`continue`

			`point_id = str(uuid.uuid4())`

			`facts.append({`
			`"id": point_id,`
			`"vector": embedding,`
			`"payload": {`
			`"text": sentence[:500],`
			`"date": date_str,`
			`"tags": extract_tags(sentence, date_str),`
			`"importance": "high" if "**" in sentence else "medium",`
			`"source": "fact-extraction",`
			`"source_type": "inferred",`
			`"category": "Extracted Fact",`
			`"confidence": "medium",`
			`"verified": False,`
			`"created_at": datetime.now().isoformat(),`
			`"access_count": 0,`
			`"last_accessed": datetime.now().isoformat()`
			`}`
			`})`

			`return facts`


			`def main():`
			`parser = argparse.ArgumentParser(description="Store conversational turns to Qdrant")`
			`parser.add_argument("user_message", nargs="?", help="User's message/query")`
			`parser.add_argument("ai_response", nargs="?", help="AI's response")`
			`parser.add_argument("--date", default=datetime.now().strftime("%Y-%m-%d"), help="Date (YYYY-MM-DD)")`
			`parser.add_argument("--tags", help="Comma-separated tags")`
			`parser.add_argument("--importance", default="medium", choices=["low", "medium", "high"])`
			`parser.add_argument("--file", help="JSON file with conversation array")`
			`parser.add_argument("--extract-facts", action="store_true", help="Also extract atomic facts from response")`

			`args = parser.parse_args()`

			`tags = args.tags.split(",") if args.tags else None`

			`if args.file:`
			`# Batch mode from JSON file`
			`with open(args.file, 'r') as f:`
			`conversations = json.load(f)`

			`total = 0`
			`for conv in conversations:`
			`user_id, ai_id = store_conversation_pair(`
			`conv["user"],`
			`conv["ai"],`
			`args.date,`
			`tags or conv.get("tags"),`
			`args.importance`
			`)`
			`if user_id and ai_id:`
			`total += 2`

			`print(f"✅ Stored {total} conversation turns")`

			`elif args.user_message and args.ai_response:`
			`# Single pair mode`
			`user_id, ai_id = store_conversation_pair(`
			`args.user_message,`
			`args.ai_response,`
			`args.date,`
			`tags,`
			`args.importance`
			`)`

			`if user_id and ai_id:`
			`print(f"✅ Stored conversation pair")`
			`print(f" User turn: {user_id[:8]}...")`
			`print(f" AI turn: {ai_id[:8]}...")`

			`if args.extract_facts:`
			`facts = extract_facts_from_text(args.ai_response, args.date)`
			`if facts:`
			`# Upload facts`
			`upsert_data = {"points": facts}`
			`req = urllib.request.Request(`
			`f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",`
			`data=json.dumps(upsert_data).encode(),`
			`headers={"Content-Type": "application/json"},`
			`method="PUT"`
			`)`
			`try:`
			`with urllib.request.urlopen(req, timeout=30) as response:`
			`print(f" Extracted {len(facts)} additional facts")`
			`except Exception as e:`
			`print(f" Warning: Could not store extracted facts: {e}")`
			`else:`
			`print("❌ Failed to store conversation")`
			`sys.exit(1)`
			`else:`
			`parser.print_help()`
			`sys.exit(1)`


			`if __name__ == "__main__":`
			`main()`