Files
jarvis-memory/skills/qdrant-memory/scripts/hybrid_search.py

136 lines
4.7 KiB
Python
Raw Normal View History

2026-02-23 12:13:04 -06:00
#!/usr/bin/env python3
"""
Hybrid search: Search both file-based memory and Qdrant vectors
Usage: hybrid_search.py "Query text" [--file-limit 3] [--vector-limit 3]
"""
import argparse
import json
import os
import subprocess
import sys
import re
from datetime import datetime, timedelta
WORKSPACE = "/root/.openclaw/workspace"
MEMORY_DIR = f"{WORKSPACE}/memory"
def search_files(query, limit=3):
"""Search recent memory files for keyword matches"""
results = []
# Get recent memory files (last 30 days)
files = []
today = datetime.now()
for i in range(30):
date_str = (today - timedelta(days=i)).strftime("%Y-%m-%d")
filepath = f"{MEMORY_DIR}/{date_str}.md"
if os.path.exists(filepath):
files.append((date_str, filepath))
# Simple keyword search
query_lower = query.lower()
keywords = set(query_lower.split())
for date_str, filepath in files[:7]: # Check last 7 days max
try:
with open(filepath, 'r') as f:
content = f.read()
# Find sections that match
lines = content.split('\n')
for i, line in enumerate(lines):
line_lower = line.lower()
if any(kw in line_lower for kw in keywords):
# Get context (3 lines before and after)
start = max(0, i - 3)
end = min(len(lines), i + 4)
context = '\n'.join(lines[start:end])
# Simple relevance score based on keyword matches
score = sum(1 for kw in keywords if kw in line_lower) / len(keywords)
results.append({
"source": f"file:{filepath}",
"date": date_str,
"score": score,
"text": context.strip(),
"type": "file"
})
if len(results) >= limit * 2: # Get more then dedupe
break
except Exception as e:
continue
# Sort by score and return top N
results.sort(key=lambda x: x["score"], reverse=True)
return results[:limit]
def search_qdrant(query, limit=3):
"""Search Qdrant using the search_memories script"""
try:
script_path = f"{WORKSPACE}/skills/qdrant-memory/scripts/search_memories.py"
result = subprocess.run(
["python3", script_path, query, "--limit", str(limit), "--json"],
capture_output=True, text=True, timeout=60
)
if result.returncode == 0:
memories = json.loads(result.stdout)
for m in memories:
m["type"] = "vector"
m["source"] = "qdrant"
return memories
except Exception as e:
print(f"Qdrant search failed (falling back to files only): {e}", file=sys.stderr)
return []
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Hybrid memory search")
parser.add_argument("query", help="Search query")
parser.add_argument("--file-limit", type=int, default=3, help="Max file results")
parser.add_argument("--vector-limit", type=int, default=3, help="Max vector results")
parser.add_argument("--json", action="store_true", help="Output as JSON")
args = parser.parse_args()
print(f"Searching for: '{args.query}'\n", file=sys.stderr)
# Search both sources
file_results = search_files(args.query, args.file_limit)
vector_results = search_qdrant(args.query, args.vector_limit)
# Combine results
all_results = file_results + vector_results
if not all_results:
print("No memories found matching your query.")
sys.exit(0)
if args.json:
print(json.dumps(all_results, indent=2))
else:
print(f"📁 File-based results ({len(file_results)}):")
print("-" * 50)
for r in file_results:
print(f"[{r['date']}] Score: {r['score']:.2f}")
print(r['text'][:300])
if len(r['text']) > 300:
print("...")
print()
print(f"\n🔍 Vector (Qdrant) results ({len(vector_results)}):")
print("-" * 50)
for r in vector_results:
print(f"[{r.get('date', 'unknown')}] Score: {r.get('score', 0):.3f} [{r.get('importance', 'medium')}]")
text = r.get('text', '')
print(text[:300])
if len(text) > 300:
print("...")
if r.get('tags'):
print(f"Tags: {', '.join(r['tags'])}")
print()