Files
jarvis-memory/skills/qdrant-memory/scripts/qd.py

428 lines
13 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Qdrant_Documents - Complete management script
Usage: qd.py <command> [options]
Commands:
list - List collection info and stats
search - Search documents
store - Store new document
delete - Delete document by ID
export - Export all documents to JSON
import - Import documents from JSON
count - Get total document count
tags - List unique tags
"""
import argparse
import json
import sys
import urllib.request
import uuid
from datetime import datetime
QDRANT_URL = "http://10.0.0.40:6333"
COLLECTION = "Qdrant_Documents"
OLLAMA_URL = "http://localhost:11434/v1"
# ============================================================================
# UTILITIES
# ============================================================================
def get_embedding(text, model="nomic-embed-text"):
"""Generate embedding using Ollama"""
data = json.dumps({"model": model, "input": text[:8000]}).encode()
req = urllib.request.Request(
f"{OLLAMA_URL}/embeddings",
data=data,
headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=60) as r:
return json.loads(r.read().decode())["data"][0]["embedding"]
except Exception as e:
print(f"Embedding error: {e}", file=sys.stderr)
return None
def make_request(url, data=None, method="GET"):
"""Make HTTP request"""
req = urllib.request.Request(url, method=method)
if data:
req.data = json.dumps(data).encode()
req.add_header("Content-Type", "application/json")
return req
def check_collection():
"""Verify collection exists"""
try:
req = make_request(f"{QDRANT_URL}/collections/{COLLECTION}")
with urllib.request.urlopen(req, timeout=5) as r:
return r.read()
except:
return None
# ============================================================================
# COMMANDS
# ============================================================================
def cmd_list(args):
"""List collection info"""
data = check_collection()
if not data:
print(f"❌ Collection '{COLLECTION}' not found")
sys.exit(1)
info = json.loads(data.decode())["result"]
print(f"\n📚 Collection: {COLLECTION}")
print(f" Status: {info['status']}")
print(f" Points: {info['points_count']:,}")
print(f" Vectors: {info['indexed_vectors_count']:,}")
print(f" Segments: {info['segments_count']}")
print(f" Vector size: {info['config']['params']['vectors']['size']}")
print(f" Distance: {info['config']['params']['vectors']['distance']}")
print(f" Optimizer: {info['optimizer_status']}")
print()
# Show payload schema
print("📋 Payload Schema:")
for field, schema in info.get("payload_schema", {}).items():
if isinstance(schema, dict) and "data_type" in schema:
print(f" - {field}: {schema['data_type']} ({schema.get('points',0):,} points)")
print()
def cmd_count(args):
"""Get document count"""
req = make_request(f"{QDRANT_URL}/collections/{COLLECTION}")
with urllib.request.urlopen(req, timeout=5) as r:
count = json.loads(r.read().decode())["result"]["points_count"]
print(f"{count}")
def cmd_search(args):
"""Search documents"""
embedding = get_embedding(args.query)
if not embedding:
print("❌ Failed to generate embedding")
sys.exit(1)
search_body = {
"vector": embedding,
"limit": args.limit,
"with_payload": True,
"with_vector": False
}
if args.tag:
search_body["filter"] = {"must": [{"key": "tag", "match": {"value": args.tag}}]}
data = json.dumps(search_body).encode()
req = urllib.request.Request(
f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
data=data,
headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=30) as r:
results = json.loads(r.read().decode())["result"]
except Exception as e:
print(f"❌ Search failed: {e}")
sys.exit(1)
if not results:
print("No results found")
return
print(f"Found {len(results)} results:\n")
for i, r in enumerate(results, 1):
p = r.get("payload", {})
print(f"[{i}] Score: {r['score']:.3f}")
print(f" Tags: {p.get('tag', 'none')}")
text = p.get('text', '')[:args.chars]
if len(p.get('text', '')) > args.chars:
text += "..."
print(f" Text: {text}")
print()
def cmd_store(args):
"""Store a document"""
# Read from file or use text argument
if args.file:
with open(args.file, 'r') as f:
text = f.read()
else:
text = args.text
if not text:
print("❌ No text to store")
sys.exit(1)
embedding = get_embedding(text)
if not embedding:
print("❌ Failed to generate embedding")
sys.exit(1)
# Parse tags
tags = args.tag.split(",") if args.tag else []
sections = args.section.split(",") if args.section else []
point = {
"points": [{
"id": str(uuid.uuid4()),
"vector": embedding,
"payload": {
"text": text,
"tag": tags,
"sections": sections,
"date": datetime.now().strftime("%Y-%m-%d"),
"created_at": datetime.now().isoformat()
}
}]
}
data = json.dumps(point).encode()
req = urllib.request.Request(
f"{QDRANT_URL}/collections/{COLLECTION}/points?wait=true",
data=data,
headers={"Content-Type": "application/json"},
method="PUT"
)
try:
with urllib.request.urlopen(req, timeout=30) as r:
result = json.loads(r.read().decode())
if result.get("status") == "ok":
print(f"✅ Stored document ({len(text)} chars, {len(embedding)}D vector)")
else:
print(f"❌ Store failed: {result}")
sys.exit(1)
except Exception as e:
print(f"❌ Store error: {e}")
sys.exit(1)
def cmd_delete(args):
"""Delete a document by ID"""
req = make_request(
f"{QDRANT_URL}/collections/{COLLECTION}/points/{args.id}",
method="DELETE"
)
try:
with urllib.request.urlopen(req, timeout=10) as r:
print(f"✅ Deleted point {args.id}")
except Exception as e:
print(f"❌ Delete error: {e}")
sys.exit(1)
def cmd_export(args):
"""Export all documents to JSON"""
print(f"Exporting {COLLECTION}...", file=sys.stderr)
# Get all points
all_points = []
offset = None
while True:
scroll_body = {"limit": 100, "with_payload": True, "with_vector": False}
if offset:
scroll_body["offset"] = offset
req = urllib.request.Request(
f"{QDRANT_URL}/collections/{COLLECTION}/points/scroll",
data=json.dumps(scroll_body).encode(),
headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=30) as r:
result = json.loads(r.read().decode())
points = result.get("result", {}).get("points", [])
if not points:
break
all_points.extend(points)
offset = result.get("result", {}).get("next_page_offset")
if not offset:
break
except Exception as e:
print(f"❌ Export error: {e}")
sys.exit(1)
# Format output
output = []
for p in all_points:
output.append({
"id": p["id"],
"payload": p.get("payload", {})
})
if args.output:
with open(args.output, 'w') as f:
json.dump(output, f, indent=2)
print(f"✅ Exported {len(output)} documents to {args.output}")
else:
print(json.dumps(output, indent=2))
def cmd_import(args):
"""Import documents from JSON"""
with open(args.file, 'r') as f:
documents = json.load(f)
print(f"Importing {len(documents)} documents...")
success = 0
for doc in documents:
text = doc.get("payload", {}).get("text", "")
if not text:
continue
embedding = get_embedding(text)
if not embedding:
print(f" ⚠️ Skipping {doc.get('id')}: embedding failed")
continue
point = {
"points": [{
"id": doc.get("id", str(uuid.uuid4())),
"vector": embedding,
"payload": doc.get("payload", {})
}]
}
data = json.dumps(point).encode()
req = urllib.request.Request(
f"{QDRANT_URL}/collections/{COLLECTION}/points?wait=true",
data=data,
headers={"Content-Type": "application/json"},
method="PUT"
)
try:
with urllib.request.urlopen(req, timeout=30) as r:
if json.loads(r.read().decode()).get("status") == "ok":
success += 1
except:
pass
print(f"✅ Imported {success}/{len(documents)} documents")
def cmd_tags(args):
"""List unique tags"""
# Use scroll to get all tags
all_tags = set()
offset = None
while True:
scroll_body = {"limit": 100, "with_payload": True, "with_vector": False}
if offset:
scroll_body["offset"] = offset
req = urllib.request.Request(
f"{QDRANT_URL}/collections/{COLLECTION}/points/scroll",
data=json.dumps(scroll_body).encode(),
headers={"Content-Type": "application/json"}
)
try:
with urllib.request.urlopen(req, timeout=30) as r:
result = json.loads(r.read().decode())
points = result.get("result", {}).get("points", [])
if not points:
break
for p in points:
tags = p.get("payload", {}).get("tag", [])
if isinstance(tags, list):
all_tags.update(tags)
elif tags:
all_tags.add(tags)
offset = result.get("result", {}).get("next_page_offset")
if not offset:
break
except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)
print(f"\n🏷️ Unique tags ({len(all_tags)}):")
for tag in sorted(all_tags):
print(f" - {tag}")
print()
# ============================================================================
# MAIN
# ============================================================================
def main():
parser = argparse.ArgumentParser(
description=f"Qdrant_Documents management ({COLLECTION})",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
qd.py list # Show collection stats
qd.py search "docker volumes" # Search documents
qd.py search "query" --tag kubernetes # Filter by tag
qd.py store "text here" --tag "docker" # Store document
qd.py store --file README.md --tag "doc"
qd.py export --output backup.json # Export all
qd.py tags # List all tags
"""
)
subparsers = parser.add_subparsers(dest="cmd", required=True)
# list
subparsers.add_parser("list", help="Show collection info")
# count
subparsers.add_parser("count", help="Get document count")
# search
p_search = subparsers.add_parser("search", help="Search documents")
p_search.add_argument("query", help="Search query")
p_search.add_argument("--tag", help="Filter by tag")
p_search.add_argument("--limit", type=int, default=5)
p_search.add_argument("--chars", type=int, default=200)
# store
p_store = subparsers.add_parser("store", help="Store document")
p_store.add_argument("text", nargs="?", help="Text to store")
p_store.add_argument("--file", help="Read from file")
p_store.add_argument("--tag", help="Comma-separated tags")
p_store.add_argument("--section", help="Comma-separated sections", default="")
# delete
p_delete = subparsers.add_parser("delete", help="Delete by ID")
p_delete.add_argument("id", help="Point ID to delete")
# export
p_export = subparsers.add_parser("export", help="Export to JSON")
p_export.add_argument("--output", "-o", help="Output file")
# import
p_import = subparsers.add_parser("import", help="Import from JSON")
p_import.add_argument("file", help="JSON file to import")
# tags
subparsers.add_parser("tags", help="List unique tags")
args = parser.parse_args()
# Run command
if args.cmd == "list":
cmd_list(args)
elif args.cmd == "count":
cmd_count(args)
elif args.cmd == "search":
cmd_search(args)
elif args.cmd == "store":
cmd_store(args)
elif args.cmd == "delete":
cmd_delete(args)
elif args.cmd == "export":
cmd_export(args)
elif args.cmd == "import":
cmd_import(args)
elif args.cmd == "tags":
cmd_tags(args)
if __name__ == "__main__":
main()