Initial commit: Jarvis Memory system
This commit is contained in:
303
skills/qdrant-memory/scripts/store_conversation.py
Executable file
303
skills/qdrant-memory/scripts/store_conversation.py
Executable file
@@ -0,0 +1,303 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Conversation Memory Capture - Store conversational turns to Qdrant
|
||||
|
||||
This script stores the full conversational context (user messages + AI responses)
|
||||
as atomic facts in Qdrant, not just summaries written to daily logs.
|
||||
|
||||
Usage:
|
||||
store_conversation.py "User message" "AI response" --date 2026-02-15 --tags "workflow"
|
||||
store_conversation.py --file conversation.json # Batch mode
|
||||
|
||||
Features:
|
||||
- Stores both user queries and AI responses
|
||||
- Generates embeddings for semantic search
|
||||
- Links related turns with conversation IDs
|
||||
- Extracts facts from responses automatically
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "kimi_memories"
|
||||
OLLAMA_URL = "http://localhost:11434/v1"
|
||||
|
||||
|
||||
def get_embedding(text: str) -> Optional[List[float]]:
|
||||
"""Generate embedding using snowflake-arctic-embed2"""
|
||||
data = json.dumps({
|
||||
"model": "snowflake-arctic-embed2",
|
||||
"input": text[:8192]
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{OLLAMA_URL}/embeddings",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result["data"][0]["embedding"]
|
||||
except Exception as e:
|
||||
print(f"Error generating embedding: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def extract_tags(text: str, date_str: str) -> List[str]:
|
||||
"""Extract relevant tags from text"""
|
||||
tags = ["conversation-turn", "atomic-fact", date_str]
|
||||
|
||||
text_lower = text.lower()
|
||||
tag_mappings = {
|
||||
"youtube": "youtube",
|
||||
"video": "video",
|
||||
"workflow": "workflow",
|
||||
"process": "process",
|
||||
"qdrant": "qdrant",
|
||||
"memory": "memory",
|
||||
"fact": "facts",
|
||||
"extract": "extraction",
|
||||
"config": "configuration",
|
||||
"setting": "settings",
|
||||
"rule": "rules",
|
||||
"decision": "decisions",
|
||||
"preference": "preferences",
|
||||
"hardware": "hardware",
|
||||
"security": "security",
|
||||
"research": "research",
|
||||
"step": "steps",
|
||||
"grok": "grok",
|
||||
"thumbnail": "thumbnail",
|
||||
"title": "title",
|
||||
"description": "description",
|
||||
"seo": "seo",
|
||||
"tags": "tags",
|
||||
}
|
||||
|
||||
for keyword, tag in tag_mappings.items():
|
||||
if keyword in text_lower:
|
||||
tags.append(tag)
|
||||
|
||||
return list(set(tags))
|
||||
|
||||
|
||||
def store_turn(
|
||||
speaker: str,
|
||||
message: str,
|
||||
date_str: str,
|
||||
tags: List[str] = None,
|
||||
conversation_id: str = None,
|
||||
turn_number: int = None,
|
||||
importance: str = "medium"
|
||||
) -> Optional[str]:
|
||||
"""Store a single conversational turn"""
|
||||
|
||||
embedding = get_embedding(message)
|
||||
if embedding is None:
|
||||
return None
|
||||
|
||||
point_id = str(uuid.uuid4())
|
||||
|
||||
if tags is None:
|
||||
tags = extract_tags(message, date_str)
|
||||
|
||||
payload = {
|
||||
"text": f"[{speaker}]: {message}",
|
||||
"date": date_str,
|
||||
"tags": tags,
|
||||
"importance": importance,
|
||||
"source": "conversation",
|
||||
"source_type": "user" if speaker == "Rob" else "assistant",
|
||||
"category": "Conversation",
|
||||
"confidence": "high",
|
||||
"verified": True,
|
||||
"created_at": datetime.now().isoformat(),
|
||||
"access_count": 0,
|
||||
"last_accessed": datetime.now().isoformat(),
|
||||
"conversation_id": conversation_id or str(uuid.uuid4()),
|
||||
"turn_number": turn_number or 0
|
||||
}
|
||||
|
||||
upsert_data = {
|
||||
"points": [{
|
||||
"id": point_id,
|
||||
"vector": embedding,
|
||||
"payload": payload
|
||||
}]
|
||||
}
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
|
||||
data=json.dumps(upsert_data).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="PUT"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
if result.get("status") == "ok":
|
||||
return point_id
|
||||
except Exception as e:
|
||||
print(f"Error storing turn: {e}", file=sys.stderr)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def store_conversation_pair(
|
||||
user_message: str,
|
||||
ai_response: str,
|
||||
date_str: str,
|
||||
tags: List[str] = None,
|
||||
importance: str = "medium"
|
||||
) -> tuple:
|
||||
"""Store both user query and AI response as linked turns"""
|
||||
|
||||
conversation_id = str(uuid.uuid4())
|
||||
|
||||
user_id = store_turn(
|
||||
speaker="Rob",
|
||||
message=user_message,
|
||||
date_str=date_str,
|
||||
tags=tags,
|
||||
conversation_id=conversation_id,
|
||||
turn_number=1,
|
||||
importance=importance
|
||||
)
|
||||
|
||||
ai_id = store_turn(
|
||||
speaker="Kimi",
|
||||
message=ai_response,
|
||||
date_str=date_str,
|
||||
tags=tags,
|
||||
conversation_id=conversation_id,
|
||||
turn_number=2,
|
||||
importance=importance
|
||||
)
|
||||
|
||||
return user_id, ai_id
|
||||
|
||||
|
||||
def extract_facts_from_text(text: str, date_str: str) -> List[Dict[str, Any]]:
|
||||
"""Extract atomic facts from a text block"""
|
||||
facts = []
|
||||
|
||||
# Split into sentences
|
||||
sentences = [s.strip() for s in text.replace('. ', '.\n').split('\n') if s.strip()]
|
||||
|
||||
for sentence in sentences:
|
||||
if len(sentence) < 10:
|
||||
continue
|
||||
|
||||
embedding = get_embedding(sentence)
|
||||
if embedding is None:
|
||||
continue
|
||||
|
||||
point_id = str(uuid.uuid4())
|
||||
|
||||
facts.append({
|
||||
"id": point_id,
|
||||
"vector": embedding,
|
||||
"payload": {
|
||||
"text": sentence[:500],
|
||||
"date": date_str,
|
||||
"tags": extract_tags(sentence, date_str),
|
||||
"importance": "high" if "**" in sentence else "medium",
|
||||
"source": "fact-extraction",
|
||||
"source_type": "inferred",
|
||||
"category": "Extracted Fact",
|
||||
"confidence": "medium",
|
||||
"verified": False,
|
||||
"created_at": datetime.now().isoformat(),
|
||||
"access_count": 0,
|
||||
"last_accessed": datetime.now().isoformat()
|
||||
}
|
||||
})
|
||||
|
||||
return facts
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Store conversational turns to Qdrant")
|
||||
parser.add_argument("user_message", nargs="?", help="User's message/query")
|
||||
parser.add_argument("ai_response", nargs="?", help="AI's response")
|
||||
parser.add_argument("--date", default=datetime.now().strftime("%Y-%m-%d"), help="Date (YYYY-MM-DD)")
|
||||
parser.add_argument("--tags", help="Comma-separated tags")
|
||||
parser.add_argument("--importance", default="medium", choices=["low", "medium", "high"])
|
||||
parser.add_argument("--file", help="JSON file with conversation array")
|
||||
parser.add_argument("--extract-facts", action="store_true", help="Also extract atomic facts from response")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
tags = args.tags.split(",") if args.tags else None
|
||||
|
||||
if args.file:
|
||||
# Batch mode from JSON file
|
||||
with open(args.file, 'r') as f:
|
||||
conversations = json.load(f)
|
||||
|
||||
total = 0
|
||||
for conv in conversations:
|
||||
user_id, ai_id = store_conversation_pair(
|
||||
conv["user"],
|
||||
conv["ai"],
|
||||
args.date,
|
||||
tags or conv.get("tags"),
|
||||
args.importance
|
||||
)
|
||||
if user_id and ai_id:
|
||||
total += 2
|
||||
|
||||
print(f"✅ Stored {total} conversation turns")
|
||||
|
||||
elif args.user_message and args.ai_response:
|
||||
# Single pair mode
|
||||
user_id, ai_id = store_conversation_pair(
|
||||
args.user_message,
|
||||
args.ai_response,
|
||||
args.date,
|
||||
tags,
|
||||
args.importance
|
||||
)
|
||||
|
||||
if user_id and ai_id:
|
||||
print(f"✅ Stored conversation pair")
|
||||
print(f" User turn: {user_id[:8]}...")
|
||||
print(f" AI turn: {ai_id[:8]}...")
|
||||
|
||||
if args.extract_facts:
|
||||
facts = extract_facts_from_text(args.ai_response, args.date)
|
||||
if facts:
|
||||
# Upload facts
|
||||
upsert_data = {"points": facts}
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
|
||||
data=json.dumps(upsert_data).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="PUT"
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
print(f" Extracted {len(facts)} additional facts")
|
||||
except Exception as e:
|
||||
print(f" Warning: Could not store extracted facts: {e}")
|
||||
else:
|
||||
print("❌ Failed to store conversation")
|
||||
sys.exit(1)
|
||||
else:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user