136 lines
4.7 KiB
Python
Executable File
136 lines
4.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Hybrid search: Search both file-based memory and Qdrant vectors
|
|
Usage: hybrid_search.py "Query text" [--file-limit 3] [--vector-limit 3]
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import re
|
|
from datetime import datetime, timedelta
|
|
|
|
WORKSPACE = "/root/.openclaw/workspace"
|
|
MEMORY_DIR = f"{WORKSPACE}/memory"
|
|
|
|
def search_files(query, limit=3):
|
|
"""Search recent memory files for keyword matches"""
|
|
results = []
|
|
|
|
# Get recent memory files (last 30 days)
|
|
files = []
|
|
today = datetime.now()
|
|
for i in range(30):
|
|
date_str = (today - timedelta(days=i)).strftime("%Y-%m-%d")
|
|
filepath = f"{MEMORY_DIR}/{date_str}.md"
|
|
if os.path.exists(filepath):
|
|
files.append((date_str, filepath))
|
|
|
|
# Simple keyword search
|
|
query_lower = query.lower()
|
|
keywords = set(query_lower.split())
|
|
|
|
for date_str, filepath in files[:7]: # Check last 7 days max
|
|
try:
|
|
with open(filepath, 'r') as f:
|
|
content = f.read()
|
|
|
|
# Find sections that match
|
|
lines = content.split('\n')
|
|
for i, line in enumerate(lines):
|
|
line_lower = line.lower()
|
|
if any(kw in line_lower for kw in keywords):
|
|
# Get context (3 lines before and after)
|
|
start = max(0, i - 3)
|
|
end = min(len(lines), i + 4)
|
|
context = '\n'.join(lines[start:end])
|
|
|
|
# Simple relevance score based on keyword matches
|
|
score = sum(1 for kw in keywords if kw in line_lower) / len(keywords)
|
|
|
|
results.append({
|
|
"source": f"file:{filepath}",
|
|
"date": date_str,
|
|
"score": score,
|
|
"text": context.strip(),
|
|
"type": "file"
|
|
})
|
|
|
|
if len(results) >= limit * 2: # Get more then dedupe
|
|
break
|
|
|
|
except Exception as e:
|
|
continue
|
|
|
|
# Sort by score and return top N
|
|
results.sort(key=lambda x: x["score"], reverse=True)
|
|
return results[:limit]
|
|
|
|
def search_qdrant(query, limit=3):
|
|
"""Search Qdrant using the search_memories script"""
|
|
try:
|
|
script_path = f"{WORKSPACE}/skills/qdrant-memory/scripts/search_memories.py"
|
|
result = subprocess.run(
|
|
["python3", script_path, query, "--limit", str(limit), "--json"],
|
|
capture_output=True, text=True, timeout=60
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
memories = json.loads(result.stdout)
|
|
for m in memories:
|
|
m["type"] = "vector"
|
|
m["source"] = "qdrant"
|
|
return memories
|
|
except Exception as e:
|
|
print(f"Qdrant search failed (falling back to files only): {e}", file=sys.stderr)
|
|
|
|
return []
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Hybrid memory search")
|
|
parser.add_argument("query", help="Search query")
|
|
parser.add_argument("--file-limit", type=int, default=3, help="Max file results")
|
|
parser.add_argument("--vector-limit", type=int, default=3, help="Max vector results")
|
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
|
|
args = parser.parse_args()
|
|
|
|
print(f"Searching for: '{args.query}'\n", file=sys.stderr)
|
|
|
|
# Search both sources
|
|
file_results = search_files(args.query, args.file_limit)
|
|
vector_results = search_qdrant(args.query, args.vector_limit)
|
|
|
|
# Combine results
|
|
all_results = file_results + vector_results
|
|
|
|
if not all_results:
|
|
print("No memories found matching your query.")
|
|
sys.exit(0)
|
|
|
|
if args.json:
|
|
print(json.dumps(all_results, indent=2))
|
|
else:
|
|
print(f"📁 File-based results ({len(file_results)}):")
|
|
print("-" * 50)
|
|
for r in file_results:
|
|
print(f"[{r['date']}] Score: {r['score']:.2f}")
|
|
print(r['text'][:300])
|
|
if len(r['text']) > 300:
|
|
print("...")
|
|
print()
|
|
|
|
print(f"\n🔍 Vector (Qdrant) results ({len(vector_results)}):")
|
|
print("-" * 50)
|
|
for r in vector_results:
|
|
print(f"[{r.get('date', 'unknown')}] Score: {r.get('score', 0):.3f} [{r.get('importance', 'medium')}]")
|
|
text = r.get('text', '')
|
|
print(text[:300])
|
|
if len(text) > 300:
|
|
print("...")
|
|
if r.get('tags'):
|
|
print(f"Tags: {', '.join(r['tags'])}")
|
|
print()
|