Files
jarvis-memory/skills/qdrant-memory/scripts/kb_search.py

137 lines
3.8 KiB
Python
Raw Normal View History

2026-02-23 12:13:04 -06:00
#!/usr/bin/env python3
"""
Search kimi_kb (Knowledge Base) - Manual only
Usage:
python3 kb_search.py "query"
python3 kb_search.py "docker volumes" --domain "Docker"
python3 kb_search.py "query" --include-urls
"""
import json
import sys
import urllib.request
from pathlib import Path
QDRANT_URL = "http://10.0.0.40:6333"
COLLECTION = "kimi_kb"
OLLAMA_URL = "http://localhost:11434/v1"
def get_embedding(text):
"""Generate embedding using snowflake-arctic-embed2"""
data = json.dumps({
"model": "snowflake-arctic-embed2",
"input": text[:8192]
}).encode()
req = urllib.request.Request(
f"{OLLAMA_URL}/embeddings",
data=data,
headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=60) as response:
result = json.loads(response.read().decode())
return result["data"][0]["embedding"]
except Exception as e:
print(f"Error generating embedding: {e}", file=sys.stderr)
return None
def search_kb(query, domain=None, limit=5):
"""Search knowledge base"""
embedding = get_embedding(query)
if embedding is None:
return None
# Build filter if domain specified
filter_clause = {}
if domain:
filter_clause = {
"must": [
{"key": "domain", "match": {"value": domain}}
]
}
search_body = {
"vector": embedding,
"limit": limit,
"with_payload": True,
"with_vector": False
}
if filter_clause:
search_body["filter"] = filter_clause
data = json.dumps(search_body).encode()
req = urllib.request.Request(
f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
data=data,
headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=30) as response:
result = json.loads(response.read().decode())
return result.get("result", [])
except Exception as e:
print(f"Error searching KB: {e}", file=sys.stderr)
return None
def format_result(point, idx):
"""Format a search result for display"""
payload = point.get("payload", {})
score = point.get("score", 0)
output = f"\n[{idx}] {payload.get('title', 'Untitled')} (score: {score:.3f})\n"
output += f" Domain: {payload.get('domain', 'unknown')}\n"
if payload.get('url'):
output += f" URL: {payload['url']}\n"
if payload.get('source'):
output += f" Source: {payload['source']}\n"
text = payload.get('text', '')[:300]
if len(payload.get('text', '')) > 300:
text += "..."
output += f" Content: {text}\n"
return output
def main():
import argparse
parser = argparse.ArgumentParser(description="Search kimi_kb")
parser.add_argument("query", help="Search query")
parser.add_argument("--domain", default=None, help="Filter by domain")
parser.add_argument("--limit", type=int, default=5, help="Number of results")
parser.add_argument("--json", action="store_true", help="Output as JSON")
args = parser.parse_args()
print(f"🔍 Searching kimi_kb: {args.query}")
if args.domain:
print(f" Filter: domain={args.domain}")
print()
results = search_kb(args.query, args.domain, args.limit)
if results is None:
print("❌ Search failed", file=sys.stderr)
sys.exit(1)
if not results:
print("No results found in kimi_kb")
return
if args.json:
print(json.dumps(results, indent=2))
else:
print(f"Found {len(results)} results:\n")
for i, point in enumerate(results, 1):
print(format_result(point, i))
if __name__ == "__main__":
main()