Initial commit: Jarvis Memory system
This commit is contained in:
236
skills/qdrant-memory/scripts/get_conversation_context.py
Executable file
236
skills/qdrant-memory/scripts/get_conversation_context.py
Executable file
@@ -0,0 +1,236 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Mem0-Style Conversation Retrieval - User-centric memory search
|
||||
|
||||
Retrieves memories by USER, not by session/chat.
|
||||
Cross-conversation search across all of Rob's memories.
|
||||
|
||||
Usage:
|
||||
# Search user's memories across all conversations
|
||||
python3 scripts/get_conversation_context.py --user-id "rob" "what was the decision about Qdrant?"
|
||||
|
||||
# Get specific conversation
|
||||
python3 scripts/get_conversation_context.py --user-id "rob" --conversation-id <id>
|
||||
|
||||
# Get all conversations for user
|
||||
python3 scripts/get_conversation_context.py --user-id "rob" --limit 50
|
||||
|
||||
Mem0-style: Memories belong to USER, not to session.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import urllib.request
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "kimi_memories"
|
||||
OLLAMA_URL = "http://10.0.0.10:11434/v1"
|
||||
|
||||
def get_embedding(text: str) -> Optional[List[float]]:
|
||||
"""Generate embedding using snowflake-arctic-embed2"""
|
||||
data = json.dumps({
|
||||
"model": "snowflake-arctic-embed2",
|
||||
"input": text[:8192]
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{OLLAMA_URL}/embeddings",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result["data"][0]["embedding"]
|
||||
except Exception as e:
|
||||
print(f"[Retrieval] Embedding error: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def search_user_memories(user_id: str, query: str, limit: int = 10) -> List[Dict]:
|
||||
"""
|
||||
MEM0-STYLE: Search memories for a specific user across all conversations.
|
||||
NOT session-based - user-centric.
|
||||
"""
|
||||
embedding = get_embedding(query)
|
||||
if embedding is None:
|
||||
return []
|
||||
|
||||
# Search with user_id filter (MEM0: memories belong to user)
|
||||
search_data = json.dumps({
|
||||
"vector": embedding,
|
||||
"limit": limit,
|
||||
"with_payload": True,
|
||||
"filter": {
|
||||
"must": [
|
||||
{"key": "user_id", "match": {"value": user_id}},
|
||||
{"key": "source_type", "match": {"value": "system"}} # Search summaries
|
||||
]
|
||||
}
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points/search",
|
||||
data=search_data,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("result", [])
|
||||
except Exception as e:
|
||||
print(f"[Retrieval] Search error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def get_user_conversations(user_id: str, limit: int = 100) -> List[Dict]:
|
||||
"""Get all conversations for a user (Mem0-style)"""
|
||||
|
||||
scroll_data = json.dumps({
|
||||
"limit": limit,
|
||||
"with_payload": True,
|
||||
"filter": {
|
||||
"must": [
|
||||
{"key": "user_id", "match": {"value": user_id}},
|
||||
{"key": "source_type", "match": {"value": "system"}} # Get summaries
|
||||
]
|
||||
}
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points/scroll",
|
||||
data=scroll_data,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("result", {}).get("points", [])
|
||||
except Exception as e:
|
||||
print(f"[Retrieval] Fetch error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def get_conversation_by_id(user_id: str, conversation_id: str, limit: int = 100) -> List[Dict]:
|
||||
"""Get full conversation by ID (with user verification)"""
|
||||
|
||||
scroll_data = json.dumps({
|
||||
"limit": limit,
|
||||
"with_payload": True,
|
||||
"filter": {
|
||||
"must": [
|
||||
{"key": "user_id", "match": {"value": user_id}},
|
||||
{"key": "conversation_id", "match": {"value": conversation_id}}
|
||||
]
|
||||
}
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points/scroll",
|
||||
data=scroll_data,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("result", {}).get("points", [])
|
||||
except Exception as e:
|
||||
print(f"[Retrieval] Fetch error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def format_conversation(points: List[Dict]) -> str:
|
||||
"""Format conversation into readable transcript"""
|
||||
|
||||
def sort_key(p):
|
||||
turn = p.get("payload", {}).get("turn_number", 0)
|
||||
source = p.get("payload", {}).get("source_type", "")
|
||||
return (turn, 0 if source in ["user", "assistant"] else 1)
|
||||
|
||||
sorted_points = sorted(points, key=sort_key)
|
||||
|
||||
output = []
|
||||
current_turn = 0
|
||||
|
||||
for point in sorted_points:
|
||||
payload = point.get("payload", {})
|
||||
text = payload.get("text", "")
|
||||
source = payload.get("source_type", "unknown")
|
||||
turn = payload.get("turn_number", 0)
|
||||
date = payload.get("date", "unknown")
|
||||
user = payload.get("user_id", "unknown")
|
||||
|
||||
if payload.get("source") == "conversation_summary":
|
||||
continue
|
||||
|
||||
if turn != current_turn:
|
||||
output.append(f"\n--- Turn {turn} [{date}] ---")
|
||||
current_turn = turn
|
||||
|
||||
output.append(text)
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Mem0-style conversation retrieval (user-centric)"
|
||||
)
|
||||
parser.add_argument("query", nargs="?", help="Search query")
|
||||
parser.add_argument("--user-id", required=True,
|
||||
help="REQUIRED: User ID (e.g., 'rob')")
|
||||
parser.add_argument("--conversation-id",
|
||||
help="Get specific conversation")
|
||||
parser.add_argument("--limit", type=int, default=10,
|
||||
help="Max results")
|
||||
parser.add_argument("--format", choices=["transcript", "json"],
|
||||
default="transcript")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.user_id:
|
||||
print("❌ --user-id is required for Mem0-style retrieval", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
points = []
|
||||
|
||||
if args.conversation_id:
|
||||
print(f"🔍 Fetching conversation for user '{args.user_id}': {args.conversation_id}")
|
||||
points = get_conversation_by_id(args.user_id, args.conversation_id, args.limit * 3)
|
||||
|
||||
elif args.query:
|
||||
print(f"🔍 Searching memories for user '{args.user_id}': {args.query}")
|
||||
points = search_user_memories(args.user_id, args.query, args.limit)
|
||||
|
||||
else:
|
||||
print(f"🔍 Fetching all memories for user '{args.user_id}'")
|
||||
points = get_user_conversations(args.user_id, args.limit)
|
||||
|
||||
if not points:
|
||||
print(f"❌ No memories found for user '{args.user_id}'")
|
||||
sys.exit(1)
|
||||
|
||||
if args.format == "json":
|
||||
print(json.dumps(points, indent=2))
|
||||
else:
|
||||
# Group by conversation_id
|
||||
conversations = {}
|
||||
for p in points:
|
||||
convo_id = p.get("payload", {}).get("conversation_id")
|
||||
if convo_id not in conversations:
|
||||
conversations[convo_id] = []
|
||||
conversations[convo_id].append(p)
|
||||
|
||||
for i, (convo_id, convo_points) in enumerate(conversations.items(), 1):
|
||||
print(f"\n{'='*60}")
|
||||
print(f"📜 Conversation {i}: {convo_id}")
|
||||
print(f"{'='*60}")
|
||||
print(format_conversation(convo_points))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user