Initial commit: Jarvis Memory system
This commit is contained in:
427
skills/qdrant-memory/scripts/qd.py
Executable file
427
skills/qdrant-memory/scripts/qd.py
Executable file
@@ -0,0 +1,427 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Qdrant_Documents - Complete management script
|
||||
Usage: qd.py <command> [options]
|
||||
|
||||
Commands:
|
||||
list - List collection info and stats
|
||||
search - Search documents
|
||||
store - Store new document
|
||||
delete - Delete document by ID
|
||||
export - Export all documents to JSON
|
||||
import - Import documents from JSON
|
||||
count - Get total document count
|
||||
tags - List unique tags
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import urllib.request
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION = "Qdrant_Documents"
|
||||
OLLAMA_URL = "http://localhost:11434/v1"
|
||||
|
||||
# ============================================================================
|
||||
# UTILITIES
|
||||
# ============================================================================
|
||||
|
||||
def get_embedding(text, model="nomic-embed-text"):
|
||||
"""Generate embedding using Ollama"""
|
||||
data = json.dumps({"model": model, "input": text[:8000]}).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{OLLAMA_URL}/embeddings",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=60) as r:
|
||||
return json.loads(r.read().decode())["data"][0]["embedding"]
|
||||
except Exception as e:
|
||||
print(f"Embedding error: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def make_request(url, data=None, method="GET"):
|
||||
"""Make HTTP request"""
|
||||
req = urllib.request.Request(url, method=method)
|
||||
if data:
|
||||
req.data = json.dumps(data).encode()
|
||||
req.add_header("Content-Type", "application/json")
|
||||
return req
|
||||
|
||||
def check_collection():
|
||||
"""Verify collection exists"""
|
||||
try:
|
||||
req = make_request(f"{QDRANT_URL}/collections/{COLLECTION}")
|
||||
with urllib.request.urlopen(req, timeout=5) as r:
|
||||
return r.read()
|
||||
except:
|
||||
return None
|
||||
|
||||
# ============================================================================
|
||||
# COMMANDS
|
||||
# ============================================================================
|
||||
|
||||
def cmd_list(args):
|
||||
"""List collection info"""
|
||||
data = check_collection()
|
||||
if not data:
|
||||
print(f"❌ Collection '{COLLECTION}' not found")
|
||||
sys.exit(1)
|
||||
|
||||
info = json.loads(data.decode())["result"]
|
||||
|
||||
print(f"\n📚 Collection: {COLLECTION}")
|
||||
print(f" Status: {info['status']}")
|
||||
print(f" Points: {info['points_count']:,}")
|
||||
print(f" Vectors: {info['indexed_vectors_count']:,}")
|
||||
print(f" Segments: {info['segments_count']}")
|
||||
print(f" Vector size: {info['config']['params']['vectors']['size']}")
|
||||
print(f" Distance: {info['config']['params']['vectors']['distance']}")
|
||||
print(f" Optimizer: {info['optimizer_status']}")
|
||||
print()
|
||||
|
||||
# Show payload schema
|
||||
print("📋 Payload Schema:")
|
||||
for field, schema in info.get("payload_schema", {}).items():
|
||||
if isinstance(schema, dict) and "data_type" in schema:
|
||||
print(f" - {field}: {schema['data_type']} ({schema.get('points',0):,} points)")
|
||||
print()
|
||||
|
||||
def cmd_count(args):
|
||||
"""Get document count"""
|
||||
req = make_request(f"{QDRANT_URL}/collections/{COLLECTION}")
|
||||
with urllib.request.urlopen(req, timeout=5) as r:
|
||||
count = json.loads(r.read().decode())["result"]["points_count"]
|
||||
print(f"{count}")
|
||||
|
||||
def cmd_search(args):
|
||||
"""Search documents"""
|
||||
embedding = get_embedding(args.query)
|
||||
if not embedding:
|
||||
print("❌ Failed to generate embedding")
|
||||
sys.exit(1)
|
||||
|
||||
search_body = {
|
||||
"vector": embedding,
|
||||
"limit": args.limit,
|
||||
"with_payload": True,
|
||||
"with_vector": False
|
||||
}
|
||||
|
||||
if args.tag:
|
||||
search_body["filter"] = {"must": [{"key": "tag", "match": {"value": args.tag}}]}
|
||||
|
||||
data = json.dumps(search_body).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as r:
|
||||
results = json.loads(r.read().decode())["result"]
|
||||
except Exception as e:
|
||||
print(f"❌ Search failed: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if not results:
|
||||
print("No results found")
|
||||
return
|
||||
|
||||
print(f"Found {len(results)} results:\n")
|
||||
for i, r in enumerate(results, 1):
|
||||
p = r.get("payload", {})
|
||||
print(f"[{i}] Score: {r['score']:.3f}")
|
||||
print(f" Tags: {p.get('tag', 'none')}")
|
||||
text = p.get('text', '')[:args.chars]
|
||||
if len(p.get('text', '')) > args.chars:
|
||||
text += "..."
|
||||
print(f" Text: {text}")
|
||||
print()
|
||||
|
||||
def cmd_store(args):
|
||||
"""Store a document"""
|
||||
# Read from file or use text argument
|
||||
if args.file:
|
||||
with open(args.file, 'r') as f:
|
||||
text = f.read()
|
||||
else:
|
||||
text = args.text
|
||||
|
||||
if not text:
|
||||
print("❌ No text to store")
|
||||
sys.exit(1)
|
||||
|
||||
embedding = get_embedding(text)
|
||||
if not embedding:
|
||||
print("❌ Failed to generate embedding")
|
||||
sys.exit(1)
|
||||
|
||||
# Parse tags
|
||||
tags = args.tag.split(",") if args.tag else []
|
||||
sections = args.section.split(",") if args.section else []
|
||||
|
||||
point = {
|
||||
"points": [{
|
||||
"id": str(uuid.uuid4()),
|
||||
"vector": embedding,
|
||||
"payload": {
|
||||
"text": text,
|
||||
"tag": tags,
|
||||
"sections": sections,
|
||||
"date": datetime.now().strftime("%Y-%m-%d"),
|
||||
"created_at": datetime.now().isoformat()
|
||||
}
|
||||
}]
|
||||
}
|
||||
|
||||
data = json.dumps(point).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION}/points?wait=true",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="PUT"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as r:
|
||||
result = json.loads(r.read().decode())
|
||||
if result.get("status") == "ok":
|
||||
print(f"✅ Stored document ({len(text)} chars, {len(embedding)}D vector)")
|
||||
else:
|
||||
print(f"❌ Store failed: {result}")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"❌ Store error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def cmd_delete(args):
|
||||
"""Delete a document by ID"""
|
||||
req = make_request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION}/points/{args.id}",
|
||||
method="DELETE"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as r:
|
||||
print(f"✅ Deleted point {args.id}")
|
||||
except Exception as e:
|
||||
print(f"❌ Delete error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def cmd_export(args):
|
||||
"""Export all documents to JSON"""
|
||||
print(f"Exporting {COLLECTION}...", file=sys.stderr)
|
||||
|
||||
# Get all points
|
||||
all_points = []
|
||||
offset = None
|
||||
|
||||
while True:
|
||||
scroll_body = {"limit": 100, "with_payload": True, "with_vector": False}
|
||||
if offset:
|
||||
scroll_body["offset"] = offset
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION}/points/scroll",
|
||||
data=json.dumps(scroll_body).encode(),
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as r:
|
||||
result = json.loads(r.read().decode())
|
||||
points = result.get("result", {}).get("points", [])
|
||||
if not points:
|
||||
break
|
||||
all_points.extend(points)
|
||||
offset = result.get("result", {}).get("next_page_offset")
|
||||
if not offset:
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"❌ Export error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Format output
|
||||
output = []
|
||||
for p in all_points:
|
||||
output.append({
|
||||
"id": p["id"],
|
||||
"payload": p.get("payload", {})
|
||||
})
|
||||
|
||||
if args.output:
|
||||
with open(args.output, 'w') as f:
|
||||
json.dump(output, f, indent=2)
|
||||
print(f"✅ Exported {len(output)} documents to {args.output}")
|
||||
else:
|
||||
print(json.dumps(output, indent=2))
|
||||
|
||||
def cmd_import(args):
|
||||
"""Import documents from JSON"""
|
||||
with open(args.file, 'r') as f:
|
||||
documents = json.load(f)
|
||||
|
||||
print(f"Importing {len(documents)} documents...")
|
||||
|
||||
success = 0
|
||||
for doc in documents:
|
||||
text = doc.get("payload", {}).get("text", "")
|
||||
if not text:
|
||||
continue
|
||||
|
||||
embedding = get_embedding(text)
|
||||
if not embedding:
|
||||
print(f" ⚠️ Skipping {doc.get('id')}: embedding failed")
|
||||
continue
|
||||
|
||||
point = {
|
||||
"points": [{
|
||||
"id": doc.get("id", str(uuid.uuid4())),
|
||||
"vector": embedding,
|
||||
"payload": doc.get("payload", {})
|
||||
}]
|
||||
}
|
||||
|
||||
data = json.dumps(point).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION}/points?wait=true",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="PUT"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as r:
|
||||
if json.loads(r.read().decode()).get("status") == "ok":
|
||||
success += 1
|
||||
except:
|
||||
pass
|
||||
|
||||
print(f"✅ Imported {success}/{len(documents)} documents")
|
||||
|
||||
def cmd_tags(args):
|
||||
"""List unique tags"""
|
||||
# Use scroll to get all tags
|
||||
all_tags = set()
|
||||
offset = None
|
||||
|
||||
while True:
|
||||
scroll_body = {"limit": 100, "with_payload": True, "with_vector": False}
|
||||
if offset:
|
||||
scroll_body["offset"] = offset
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION}/points/scroll",
|
||||
data=json.dumps(scroll_body).encode(),
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as r:
|
||||
result = json.loads(r.read().decode())
|
||||
points = result.get("result", {}).get("points", [])
|
||||
if not points:
|
||||
break
|
||||
for p in points:
|
||||
tags = p.get("payload", {}).get("tag", [])
|
||||
if isinstance(tags, list):
|
||||
all_tags.update(tags)
|
||||
elif tags:
|
||||
all_tags.add(tags)
|
||||
offset = result.get("result", {}).get("next_page_offset")
|
||||
if not offset:
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"\n🏷️ Unique tags ({len(all_tags)}):")
|
||||
for tag in sorted(all_tags):
|
||||
print(f" - {tag}")
|
||||
print()
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description=f"Qdrant_Documents management ({COLLECTION})",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
qd.py list # Show collection stats
|
||||
qd.py search "docker volumes" # Search documents
|
||||
qd.py search "query" --tag kubernetes # Filter by tag
|
||||
qd.py store "text here" --tag "docker" # Store document
|
||||
qd.py store --file README.md --tag "doc"
|
||||
qd.py export --output backup.json # Export all
|
||||
qd.py tags # List all tags
|
||||
"""
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(dest="cmd", required=True)
|
||||
|
||||
# list
|
||||
subparsers.add_parser("list", help="Show collection info")
|
||||
|
||||
# count
|
||||
subparsers.add_parser("count", help="Get document count")
|
||||
|
||||
# search
|
||||
p_search = subparsers.add_parser("search", help="Search documents")
|
||||
p_search.add_argument("query", help="Search query")
|
||||
p_search.add_argument("--tag", help="Filter by tag")
|
||||
p_search.add_argument("--limit", type=int, default=5)
|
||||
p_search.add_argument("--chars", type=int, default=200)
|
||||
|
||||
# store
|
||||
p_store = subparsers.add_parser("store", help="Store document")
|
||||
p_store.add_argument("text", nargs="?", help="Text to store")
|
||||
p_store.add_argument("--file", help="Read from file")
|
||||
p_store.add_argument("--tag", help="Comma-separated tags")
|
||||
p_store.add_argument("--section", help="Comma-separated sections", default="")
|
||||
|
||||
# delete
|
||||
p_delete = subparsers.add_parser("delete", help="Delete by ID")
|
||||
p_delete.add_argument("id", help="Point ID to delete")
|
||||
|
||||
# export
|
||||
p_export = subparsers.add_parser("export", help="Export to JSON")
|
||||
p_export.add_argument("--output", "-o", help="Output file")
|
||||
|
||||
# import
|
||||
p_import = subparsers.add_parser("import", help="Import from JSON")
|
||||
p_import.add_argument("file", help="JSON file to import")
|
||||
|
||||
# tags
|
||||
subparsers.add_parser("tags", help="List unique tags")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Run command
|
||||
if args.cmd == "list":
|
||||
cmd_list(args)
|
||||
elif args.cmd == "count":
|
||||
cmd_count(args)
|
||||
elif args.cmd == "search":
|
||||
cmd_search(args)
|
||||
elif args.cmd == "store":
|
||||
cmd_store(args)
|
||||
elif args.cmd == "delete":
|
||||
cmd_delete(args)
|
||||
elif args.cmd == "export":
|
||||
cmd_export(args)
|
||||
elif args.cmd == "import":
|
||||
cmd_import(args)
|
||||
elif args.cmd == "tags":
|
||||
cmd_tags(args)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user