#!/usr/bin/env python3 """ Hybrid search: Search both file-based memory and Qdrant vectors Usage: hybrid_search.py "Query text" [--file-limit 3] [--vector-limit 3] """ import argparse import json import os import subprocess import sys import re from datetime import datetime, timedelta WORKSPACE = "/root/.openclaw/workspace" MEMORY_DIR = f"{WORKSPACE}/memory" def search_files(query, limit=3): """Search recent memory files for keyword matches""" results = [] # Get recent memory files (last 30 days) files = [] today = datetime.now() for i in range(30): date_str = (today - timedelta(days=i)).strftime("%Y-%m-%d") filepath = f"{MEMORY_DIR}/{date_str}.md" if os.path.exists(filepath): files.append((date_str, filepath)) # Simple keyword search query_lower = query.lower() keywords = set(query_lower.split()) for date_str, filepath in files[:7]: # Check last 7 days max try: with open(filepath, 'r') as f: content = f.read() # Find sections that match lines = content.split('\n') for i, line in enumerate(lines): line_lower = line.lower() if any(kw in line_lower for kw in keywords): # Get context (3 lines before and after) start = max(0, i - 3) end = min(len(lines), i + 4) context = '\n'.join(lines[start:end]) # Simple relevance score based on keyword matches score = sum(1 for kw in keywords if kw in line_lower) / len(keywords) results.append({ "source": f"file:{filepath}", "date": date_str, "score": score, "text": context.strip(), "type": "file" }) if len(results) >= limit * 2: # Get more then dedupe break except Exception as e: continue # Sort by score and return top N results.sort(key=lambda x: x["score"], reverse=True) return results[:limit] def search_qdrant(query, limit=3): """Search Qdrant using the search_memories script""" try: script_path = f"{WORKSPACE}/skills/qdrant-memory/scripts/search_memories.py" result = subprocess.run( ["python3", script_path, query, "--limit", str(limit), "--json"], capture_output=True, text=True, timeout=60 ) if result.returncode == 0: memories = json.loads(result.stdout) for m in memories: m["type"] = "vector" m["source"] = "qdrant" return memories except Exception as e: print(f"Qdrant search failed (falling back to files only): {e}", file=sys.stderr) return [] if __name__ == "__main__": parser = argparse.ArgumentParser(description="Hybrid memory search") parser.add_argument("query", help="Search query") parser.add_argument("--file-limit", type=int, default=3, help="Max file results") parser.add_argument("--vector-limit", type=int, default=3, help="Max vector results") parser.add_argument("--json", action="store_true", help="Output as JSON") args = parser.parse_args() print(f"Searching for: '{args.query}'\n", file=sys.stderr) # Search both sources file_results = search_files(args.query, args.file_limit) vector_results = search_qdrant(args.query, args.vector_limit) # Combine results all_results = file_results + vector_results if not all_results: print("No memories found matching your query.") sys.exit(0) if args.json: print(json.dumps(all_results, indent=2)) else: print(f"šŸ“ File-based results ({len(file_results)}):") print("-" * 50) for r in file_results: print(f"[{r['date']}] Score: {r['score']:.2f}") print(r['text'][:300]) if len(r['text']) > 300: print("...") print() print(f"\nšŸ” Vector (Qdrant) results ({len(vector_results)}):") print("-" * 50) for r in vector_results: print(f"[{r.get('date', 'unknown')}] Score: {r.get('score', 0):.3f} [{r.get('importance', 'medium')}]") text = r.get('text', '') print(text[:300]) if len(text) > 300: print("...") if r.get('tags'): print(f"Tags: {', '.join(r['tags'])}") print()