jarvis-memory/skills/qdrant-memory/scripts/store_conversation.py

#!/usr/bin/env python3
"""
Conversation Memory Capture - Store conversational turns to Qdrant

This script stores the full conversational context (user messages + AI responses)
as atomic facts in Qdrant, not just summaries written to daily logs.

Usage:
    store_conversation.py "User message" "AI response" --date 2026-02-15 --tags "workflow"
    store_conversation.py --file conversation.json  # Batch mode

Features:
    - Stores both user queries and AI responses
    - Generates embeddings for semantic search
    - Links related turns with conversation IDs
    - Extracts facts from responses automatically
"""

import argparse
import json
import os
import sys
import urllib.request
import urllib.error
import uuid
from datetime import datetime
from typing import List, Optional, Dict, Any

QDRANT_URL = "http://10.0.0.40:6333"
COLLECTION_NAME = "kimi_memories"
OLLAMA_URL = "http://localhost:11434/v1"


def get_embedding(text: str) -> Optional[List[float]]:
    """Generate embedding using snowflake-arctic-embed2"""
    data = json.dumps({
        "model": "snowflake-arctic-embed2",
        "input": text[:8192]
    }).encode()

    req = urllib.request.Request(
        f"{OLLAMA_URL}/embeddings",
        data=data,
        headers={"Content-Type": "application/json"}
    )

    try:
        with urllib.request.urlopen(req, timeout=30) as response:
            result = json.loads(response.read().decode())
            return result["data"][0]["embedding"]
    except Exception as e:
        print(f"Error generating embedding: {e}", file=sys.stderr)
        return None


def extract_tags(text: str, date_str: str) -> List[str]:
    """Extract relevant tags from text"""
    tags = ["conversation-turn", "atomic-fact", date_str]

    text_lower = text.lower()
    tag_mappings = {
        "youtube": "youtube",
        "video": "video",
        "workflow": "workflow",
        "process": "process",
        "qdrant": "qdrant",
        "memory": "memory",
        "fact": "facts",
        "extract": "extraction",
        "config": "configuration",
        "setting": "settings",
        "rule": "rules",
        "decision": "decisions",
        "preference": "preferences",
        "hardware": "hardware",
        "security": "security",
        "research": "research",
        "step": "steps",
        "grok": "grok",
        "thumbnail": "thumbnail",
        "title": "title",
        "description": "description",
        "seo": "seo",
        "tags": "tags",
    }

    for keyword, tag in tag_mappings.items():
        if keyword in text_lower:
            tags.append(tag)

    return list(set(tags))


def store_turn(
    speaker: str,
    message: str,
    date_str: str,
    tags: List[str] = None,
    conversation_id: str = None,
    turn_number: int = None,
    importance: str = "medium"
) -> Optional[str]:
    """Store a single conversational turn"""

    embedding = get_embedding(message)
    if embedding is None:
        return None

    point_id = str(uuid.uuid4())

    if tags is None:
        tags = extract_tags(message, date_str)

    payload = {
        "text": f"[{speaker}]: {message}",
        "date": date_str,
        "tags": tags,
        "importance": importance,
        "source": "conversation",
        "source_type": "user" if speaker == "Rob" else "assistant",
        "category": "Conversation",
        "confidence": "high",
        "verified": True,
        "created_at": datetime.now().isoformat(),
        "access_count": 0,
        "last_accessed": datetime.now().isoformat(),
        "conversation_id": conversation_id or str(uuid.uuid4()),
        "turn_number": turn_number or 0
    }

    upsert_data = {
        "points": [{
            "id": point_id,
            "vector": embedding,
            "payload": payload
        }]
    }

    req = urllib.request.Request(
        f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
        data=json.dumps(upsert_data).encode(),
        headers={"Content-Type": "application/json"},
        method="PUT"
    )

    try:
        with urllib.request.urlopen(req, timeout=10) as response:
            result = json.loads(response.read().decode())
            if result.get("status") == "ok":
                return point_id
    except Exception as e:
        print(f"Error storing turn: {e}", file=sys.stderr)

    return None


def store_conversation_pair(
    user_message: str,
    ai_response: str,
    date_str: str,
    tags: List[str] = None,
    importance: str = "medium"
) -> tuple:
    """Store both user query and AI response as linked turns"""

    conversation_id = str(uuid.uuid4())

    user_id = store_turn(
        speaker="Rob",
        message=user_message,
        date_str=date_str,
        tags=tags,
        conversation_id=conversation_id,
        turn_number=1,
        importance=importance
    )

    ai_id = store_turn(
        speaker="Kimi",
        message=ai_response,
        date_str=date_str,
        tags=tags,
        conversation_id=conversation_id,
        turn_number=2,
        importance=importance
    )

    return user_id, ai_id


def extract_facts_from_text(text: str, date_str: str) -> List[Dict[str, Any]]:
    """Extract atomic facts from a text block"""
    facts = []

    # Split into sentences
    sentences = [s.strip() for s in text.replace('. ', '.\n').split('\n') if s.strip()]

    for sentence in sentences:
        if len(sentence) < 10:
            continue

        embedding = get_embedding(sentence)
        if embedding is None:
            continue

        point_id = str(uuid.uuid4())

        facts.append({
            "id": point_id,
            "vector": embedding,
            "payload": {
                "text": sentence[:500],
                "date": date_str,
                "tags": extract_tags(sentence, date_str),
                "importance": "high" if "**" in sentence else "medium",
                "source": "fact-extraction",
                "source_type": "inferred",
                "category": "Extracted Fact",
                "confidence": "medium",
                "verified": False,
                "created_at": datetime.now().isoformat(),
                "access_count": 0,
                "last_accessed": datetime.now().isoformat()
            }
        })

    return facts


def main():
    parser = argparse.ArgumentParser(description="Store conversational turns to Qdrant")
    parser.add_argument("user_message", nargs="?", help="User's message/query")
    parser.add_argument("ai_response", nargs="?", help="AI's response")
    parser.add_argument("--date", default=datetime.now().strftime("%Y-%m-%d"), help="Date (YYYY-MM-DD)")
    parser.add_argument("--tags", help="Comma-separated tags")
    parser.add_argument("--importance", default="medium", choices=["low", "medium", "high"])
    parser.add_argument("--file", help="JSON file with conversation array")
    parser.add_argument("--extract-facts", action="store_true", help="Also extract atomic facts from response")

    args = parser.parse_args()

    tags = args.tags.split(",") if args.tags else None

    if args.file:
        # Batch mode from JSON file
        with open(args.file, 'r') as f:
            conversations = json.load(f)

        total = 0
        for conv in conversations:
            user_id, ai_id = store_conversation_pair(
                conv["user"],
                conv["ai"],
                args.date,
                tags or conv.get("tags"),
                args.importance
            )
            if user_id and ai_id:
                total += 2

        print(f"✅ Stored {total} conversation turns")

    elif args.user_message and args.ai_response:
        # Single pair mode
        user_id, ai_id = store_conversation_pair(
            args.user_message,
            args.ai_response,
            args.date,
            tags,
            args.importance
        )

        if user_id and ai_id:
            print(f"✅ Stored conversation pair")
            print(f"   User turn: {user_id[:8]}...")
            print(f"   AI turn: {ai_id[:8]}...")

            if args.extract_facts:
                facts = extract_facts_from_text(args.ai_response, args.date)
                if facts:
                    # Upload facts
                    upsert_data = {"points": facts}
                    req = urllib.request.Request(
                        f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
                        data=json.dumps(upsert_data).encode(),
                        headers={"Content-Type": "application/json"},
                        method="PUT"
                    )
                    try:
                        with urllib.request.urlopen(req, timeout=30) as response:
                            print(f"   Extracted {len(facts)} additional facts")
                    except Exception as e:
                        print(f"   Warning: Could not store extracted facts: {e}")
        else:
            print("❌ Failed to store conversation")
            sys.exit(1)
    else:
        parser.print_help()
        sys.exit(1)


if __name__ == "__main__":
    main()