forked from SpeedyFoxAi/jarvis-memory
237 lines
7.8 KiB
Python
Executable File
237 lines
7.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Mem0-Style Conversation Retrieval - User-centric memory search
|
|
|
|
Retrieves memories by USER, not by session/chat.
|
|
Cross-conversation search across all of Rob's memories.
|
|
|
|
Usage:
|
|
# Search user's memories across all conversations
|
|
python3 scripts/get_conversation_context.py --user-id "rob" "what was the decision about Qdrant?"
|
|
|
|
# Get specific conversation
|
|
python3 scripts/get_conversation_context.py --user-id "rob" --conversation-id <id>
|
|
|
|
# Get all conversations for user
|
|
python3 scripts/get_conversation_context.py --user-id "rob" --limit 50
|
|
|
|
Mem0-style: Memories belong to USER, not to session.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
import urllib.request
|
|
from datetime import datetime
|
|
from typing import List, Optional, Dict, Any
|
|
|
|
QDRANT_URL = "http://10.0.0.40:6333"
|
|
COLLECTION_NAME = "kimi_memories"
|
|
OLLAMA_URL = "http://10.0.0.10:11434/v1"
|
|
|
|
def get_embedding(text: str) -> Optional[List[float]]:
|
|
"""Generate embedding using snowflake-arctic-embed2"""
|
|
data = json.dumps({
|
|
"model": "snowflake-arctic-embed2",
|
|
"input": text[:8192]
|
|
}).encode()
|
|
|
|
req = urllib.request.Request(
|
|
f"{OLLAMA_URL}/embeddings",
|
|
data=data,
|
|
headers={"Content-Type": "application/json"}
|
|
)
|
|
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=30) as response:
|
|
result = json.loads(response.read().decode())
|
|
return result["data"][0]["embedding"]
|
|
except Exception as e:
|
|
print(f"[Retrieval] Embedding error: {e}", file=sys.stderr)
|
|
return None
|
|
|
|
def search_user_memories(user_id: str, query: str, limit: int = 10) -> List[Dict]:
|
|
"""
|
|
MEM0-STYLE: Search memories for a specific user across all conversations.
|
|
NOT session-based - user-centric.
|
|
"""
|
|
embedding = get_embedding(query)
|
|
if embedding is None:
|
|
return []
|
|
|
|
# Search with user_id filter (MEM0: memories belong to user)
|
|
search_data = json.dumps({
|
|
"vector": embedding,
|
|
"limit": limit,
|
|
"with_payload": True,
|
|
"filter": {
|
|
"must": [
|
|
{"key": "user_id", "match": {"value": user_id}},
|
|
{"key": "source_type", "match": {"value": "system"}} # Search summaries
|
|
]
|
|
}
|
|
}).encode()
|
|
|
|
req = urllib.request.Request(
|
|
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points/search",
|
|
data=search_data,
|
|
headers={"Content-Type": "application/json"},
|
|
method="POST"
|
|
)
|
|
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=10) as response:
|
|
result = json.loads(response.read().decode())
|
|
return result.get("result", [])
|
|
except Exception as e:
|
|
print(f"[Retrieval] Search error: {e}", file=sys.stderr)
|
|
return []
|
|
|
|
def get_user_conversations(user_id: str, limit: int = 100) -> List[Dict]:
|
|
"""Get all conversations for a user (Mem0-style)"""
|
|
|
|
scroll_data = json.dumps({
|
|
"limit": limit,
|
|
"with_payload": True,
|
|
"filter": {
|
|
"must": [
|
|
{"key": "user_id", "match": {"value": user_id}},
|
|
{"key": "source_type", "match": {"value": "system"}} # Get summaries
|
|
]
|
|
}
|
|
}).encode()
|
|
|
|
req = urllib.request.Request(
|
|
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points/scroll",
|
|
data=scroll_data,
|
|
headers={"Content-Type": "application/json"},
|
|
method="POST"
|
|
)
|
|
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=10) as response:
|
|
result = json.loads(response.read().decode())
|
|
return result.get("result", {}).get("points", [])
|
|
except Exception as e:
|
|
print(f"[Retrieval] Fetch error: {e}", file=sys.stderr)
|
|
return []
|
|
|
|
def get_conversation_by_id(user_id: str, conversation_id: str, limit: int = 100) -> List[Dict]:
|
|
"""Get full conversation by ID (with user verification)"""
|
|
|
|
scroll_data = json.dumps({
|
|
"limit": limit,
|
|
"with_payload": True,
|
|
"filter": {
|
|
"must": [
|
|
{"key": "user_id", "match": {"value": user_id}},
|
|
{"key": "conversation_id", "match": {"value": conversation_id}}
|
|
]
|
|
}
|
|
}).encode()
|
|
|
|
req = urllib.request.Request(
|
|
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points/scroll",
|
|
data=scroll_data,
|
|
headers={"Content-Type": "application/json"},
|
|
method="POST"
|
|
)
|
|
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=10) as response:
|
|
result = json.loads(response.read().decode())
|
|
return result.get("result", {}).get("points", [])
|
|
except Exception as e:
|
|
print(f"[Retrieval] Fetch error: {e}", file=sys.stderr)
|
|
return []
|
|
|
|
def format_conversation(points: List[Dict]) -> str:
|
|
"""Format conversation into readable transcript"""
|
|
|
|
def sort_key(p):
|
|
turn = p.get("payload", {}).get("turn_number", 0)
|
|
source = p.get("payload", {}).get("source_type", "")
|
|
return (turn, 0 if source in ["user", "assistant"] else 1)
|
|
|
|
sorted_points = sorted(points, key=sort_key)
|
|
|
|
output = []
|
|
current_turn = 0
|
|
|
|
for point in sorted_points:
|
|
payload = point.get("payload", {})
|
|
text = payload.get("text", "")
|
|
source = payload.get("source_type", "unknown")
|
|
turn = payload.get("turn_number", 0)
|
|
date = payload.get("date", "unknown")
|
|
user = payload.get("user_id", "unknown")
|
|
|
|
if payload.get("source") == "conversation_summary":
|
|
continue
|
|
|
|
if turn != current_turn:
|
|
output.append(f"\n--- Turn {turn} [{date}] ---")
|
|
current_turn = turn
|
|
|
|
output.append(text)
|
|
|
|
return "\n".join(output)
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Mem0-style conversation retrieval (user-centric)"
|
|
)
|
|
parser.add_argument("query", nargs="?", help="Search query")
|
|
parser.add_argument("--user-id", required=True,
|
|
help="REQUIRED: User ID (e.g., 'rob')")
|
|
parser.add_argument("--conversation-id",
|
|
help="Get specific conversation")
|
|
parser.add_argument("--limit", type=int, default=10,
|
|
help="Max results")
|
|
parser.add_argument("--format", choices=["transcript", "json"],
|
|
default="transcript")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not args.user_id:
|
|
print("❌ --user-id is required for Mem0-style retrieval", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
points = []
|
|
|
|
if args.conversation_id:
|
|
print(f"🔍 Fetching conversation for user '{args.user_id}': {args.conversation_id}")
|
|
points = get_conversation_by_id(args.user_id, args.conversation_id, args.limit * 3)
|
|
|
|
elif args.query:
|
|
print(f"🔍 Searching memories for user '{args.user_id}': {args.query}")
|
|
points = search_user_memories(args.user_id, args.query, args.limit)
|
|
|
|
else:
|
|
print(f"🔍 Fetching all memories for user '{args.user_id}'")
|
|
points = get_user_conversations(args.user_id, args.limit)
|
|
|
|
if not points:
|
|
print(f"❌ No memories found for user '{args.user_id}'")
|
|
sys.exit(1)
|
|
|
|
if args.format == "json":
|
|
print(json.dumps(points, indent=2))
|
|
else:
|
|
# Group by conversation_id
|
|
conversations = {}
|
|
for p in points:
|
|
convo_id = p.get("payload", {}).get("conversation_id")
|
|
if convo_id not in conversations:
|
|
conversations[convo_id] = []
|
|
conversations[convo_id].append(p)
|
|
|
|
for i, (convo_id, convo_points) in enumerate(conversations.items(), 1):
|
|
print(f"\n{'='*60}")
|
|
print(f"📜 Conversation {i}: {convo_id}")
|
|
print(f"{'='*60}")
|
|
print(format_conversation(convo_points))
|
|
|
|
if __name__ == "__main__":
|
|
main()
|