Files
jarvis-memory/skills/qdrant-memory/scripts/store_conversation.py

303 lines
9.1 KiB
Python
Raw Normal View History

2026-02-23 12:13:04 -06:00
#!/usr/bin/env python3
"""
Conversation Memory Capture - Store conversational turns to Qdrant
This script stores the full conversational context (user messages + AI responses)
as atomic facts in Qdrant, not just summaries written to daily logs.
Usage:
store_conversation.py "User message" "AI response" --date 2026-02-15 --tags "workflow"
store_conversation.py --file conversation.json # Batch mode
Features:
- Stores both user queries and AI responses
- Generates embeddings for semantic search
- Links related turns with conversation IDs
- Extracts facts from responses automatically
"""
import argparse
import json
import os
import sys
import urllib.request
import urllib.error
import uuid
from datetime import datetime
from typing import List, Optional, Dict, Any
QDRANT_URL = "http://10.0.0.40:6333"
COLLECTION_NAME = "kimi_memories"
OLLAMA_URL = "http://localhost:11434/v1"
def get_embedding(text: str) -> Optional[List[float]]:
"""Generate embedding using snowflake-arctic-embed2"""
data = json.dumps({
"model": "snowflake-arctic-embed2",
"input": text[:8192]
}).encode()
req = urllib.request.Request(
f"{OLLAMA_URL}/embeddings",
data=data,
headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=30) as response:
result = json.loads(response.read().decode())
return result["data"][0]["embedding"]
except Exception as e:
print(f"Error generating embedding: {e}", file=sys.stderr)
return None
def extract_tags(text: str, date_str: str) -> List[str]:
"""Extract relevant tags from text"""
tags = ["conversation-turn", "atomic-fact", date_str]
text_lower = text.lower()
tag_mappings = {
"youtube": "youtube",
"video": "video",
"workflow": "workflow",
"process": "process",
"qdrant": "qdrant",
"memory": "memory",
"fact": "facts",
"extract": "extraction",
"config": "configuration",
"setting": "settings",
"rule": "rules",
"decision": "decisions",
"preference": "preferences",
"hardware": "hardware",
"security": "security",
"research": "research",
"step": "steps",
"grok": "grok",
"thumbnail": "thumbnail",
"title": "title",
"description": "description",
"seo": "seo",
"tags": "tags",
}
for keyword, tag in tag_mappings.items():
if keyword in text_lower:
tags.append(tag)
return list(set(tags))
def store_turn(
speaker: str,
message: str,
date_str: str,
tags: List[str] = None,
conversation_id: str = None,
turn_number: int = None,
importance: str = "medium"
) -> Optional[str]:
"""Store a single conversational turn"""
embedding = get_embedding(message)
if embedding is None:
return None
point_id = str(uuid.uuid4())
if tags is None:
tags = extract_tags(message, date_str)
payload = {
"text": f"[{speaker}]: {message}",
"date": date_str,
"tags": tags,
"importance": importance,
"source": "conversation",
"source_type": "user" if speaker == "Rob" else "assistant",
"category": "Conversation",
"confidence": "high",
"verified": True,
"created_at": datetime.now().isoformat(),
"access_count": 0,
"last_accessed": datetime.now().isoformat(),
"conversation_id": conversation_id or str(uuid.uuid4()),
"turn_number": turn_number or 0
}
upsert_data = {
"points": [{
"id": point_id,
"vector": embedding,
"payload": payload
}]
}
req = urllib.request.Request(
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
data=json.dumps(upsert_data).encode(),
headers={"Content-Type": "application/json"},
method="PUT"
)
try:
with urllib.request.urlopen(req, timeout=10) as response:
result = json.loads(response.read().decode())
if result.get("status") == "ok":
return point_id
except Exception as e:
print(f"Error storing turn: {e}", file=sys.stderr)
return None
def store_conversation_pair(
user_message: str,
ai_response: str,
date_str: str,
tags: List[str] = None,
importance: str = "medium"
) -> tuple:
"""Store both user query and AI response as linked turns"""
conversation_id = str(uuid.uuid4())
user_id = store_turn(
speaker="Rob",
message=user_message,
date_str=date_str,
tags=tags,
conversation_id=conversation_id,
turn_number=1,
importance=importance
)
ai_id = store_turn(
speaker="Kimi",
message=ai_response,
date_str=date_str,
tags=tags,
conversation_id=conversation_id,
turn_number=2,
importance=importance
)
return user_id, ai_id
def extract_facts_from_text(text: str, date_str: str) -> List[Dict[str, Any]]:
"""Extract atomic facts from a text block"""
facts = []
# Split into sentences
sentences = [s.strip() for s in text.replace('. ', '.\n').split('\n') if s.strip()]
for sentence in sentences:
if len(sentence) < 10:
continue
embedding = get_embedding(sentence)
if embedding is None:
continue
point_id = str(uuid.uuid4())
facts.append({
"id": point_id,
"vector": embedding,
"payload": {
"text": sentence[:500],
"date": date_str,
"tags": extract_tags(sentence, date_str),
"importance": "high" if "**" in sentence else "medium",
"source": "fact-extraction",
"source_type": "inferred",
"category": "Extracted Fact",
"confidence": "medium",
"verified": False,
"created_at": datetime.now().isoformat(),
"access_count": 0,
"last_accessed": datetime.now().isoformat()
}
})
return facts
def main():
parser = argparse.ArgumentParser(description="Store conversational turns to Qdrant")
parser.add_argument("user_message", nargs="?", help="User's message/query")
parser.add_argument("ai_response", nargs="?", help="AI's response")
parser.add_argument("--date", default=datetime.now().strftime("%Y-%m-%d"), help="Date (YYYY-MM-DD)")
parser.add_argument("--tags", help="Comma-separated tags")
parser.add_argument("--importance", default="medium", choices=["low", "medium", "high"])
parser.add_argument("--file", help="JSON file with conversation array")
parser.add_argument("--extract-facts", action="store_true", help="Also extract atomic facts from response")
args = parser.parse_args()
tags = args.tags.split(",") if args.tags else None
if args.file:
# Batch mode from JSON file
with open(args.file, 'r') as f:
conversations = json.load(f)
total = 0
for conv in conversations:
user_id, ai_id = store_conversation_pair(
conv["user"],
conv["ai"],
args.date,
tags or conv.get("tags"),
args.importance
)
if user_id and ai_id:
total += 2
print(f"✅ Stored {total} conversation turns")
elif args.user_message and args.ai_response:
# Single pair mode
user_id, ai_id = store_conversation_pair(
args.user_message,
args.ai_response,
args.date,
tags,
args.importance
)
if user_id and ai_id:
print(f"✅ Stored conversation pair")
print(f" User turn: {user_id[:8]}...")
print(f" AI turn: {ai_id[:8]}...")
if args.extract_facts:
facts = extract_facts_from_text(args.ai_response, args.date)
if facts:
# Upload facts
upsert_data = {"points": facts}
req = urllib.request.Request(
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
data=json.dumps(upsert_data).encode(),
headers={"Content-Type": "application/json"},
method="PUT"
)
try:
with urllib.request.urlopen(req, timeout=30) as response:
print(f" Extracted {len(facts)} additional facts")
except Exception as e:
print(f" Warning: Could not store extracted facts: {e}")
else:
print("❌ Failed to store conversation")
sys.exit(1)
else:
parser.print_help()
sys.exit(1)
if __name__ == "__main__":
main()