243 lines
8.5 KiB
Python
Executable File
243 lines
8.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
Initialize Qdrant collections for Kimi Memory System
|
||
Creates 3 collections with snowflake-arctic-embed2 (1024 dims) using Qdrant 2025 best practices:
|
||
|
||
1. kimi_memories - Personal memories, preferences, lessons learned
|
||
2. kimi_kb - Knowledge base for web search, documents, scraped data
|
||
3. private_court_docs - Court documents and legal discussions
|
||
|
||
Features:
|
||
- on_disk=True for vectors (minimize RAM usage)
|
||
- on_disk_payload=True for payload
|
||
- Optimizer config for efficient indexing
|
||
- Binary quantization support (2025+ feature)
|
||
|
||
Usage: init_all_collections.py [--recreate]
|
||
"""
|
||
|
||
import argparse
|
||
import json
|
||
import sys
|
||
|
||
QDRANT_URL = "http://10.0.0.40:6333"
|
||
|
||
# Collection configurations
|
||
COLLECTIONS = {
|
||
"kimi_memories": {
|
||
"description": "Personal memories, preferences, lessons learned",
|
||
"vector_size": 1024
|
||
},
|
||
"kimi_kb": {
|
||
"description": "Knowledge base - web data, documents, reference materials",
|
||
"vector_size": 1024
|
||
},
|
||
"private_court_docs": {
|
||
"description": "Court documents and legal discussions",
|
||
"vector_size": 1024
|
||
}
|
||
}
|
||
|
||
def make_request(url, data=None, method="GET"):
|
||
"""Make HTTP request with proper method"""
|
||
import urllib.request
|
||
req = urllib.request.Request(url, method=method)
|
||
if data:
|
||
req.data = json.dumps(data).encode()
|
||
req.add_header("Content-Type", "application/json")
|
||
return req
|
||
|
||
def collection_exists(name):
|
||
"""Check if collection exists"""
|
||
import urllib.request
|
||
import urllib.error
|
||
try:
|
||
req = make_request(f"{QDRANT_URL}/collections/{name}")
|
||
with urllib.request.urlopen(req, timeout=5) as response:
|
||
return True
|
||
except urllib.error.HTTPError as e:
|
||
if e.code == 404:
|
||
return False
|
||
raise
|
||
except Exception:
|
||
return False
|
||
|
||
def get_collection_info(name):
|
||
"""Get collection info"""
|
||
import urllib.request
|
||
try:
|
||
req = make_request(f"{QDRANT_URL}/collections/{name}")
|
||
with urllib.request.urlopen(req, timeout=5) as response:
|
||
return json.loads(response.read().decode())
|
||
except Exception as e:
|
||
return None
|
||
|
||
def create_collection(name, vector_size=1024):
|
||
"""Create a collection with Qdrant 2025 best practices"""
|
||
import urllib.request
|
||
|
||
config = {
|
||
"vectors": {
|
||
"size": vector_size,
|
||
"distance": "Cosine",
|
||
"on_disk": True, # Store vectors on disk to minimize RAM
|
||
"quantization_config": {
|
||
"binary": {
|
||
"always_ram": True # Keep compressed vectors in RAM for fast search
|
||
}
|
||
}
|
||
},
|
||
"on_disk_payload": True, # Store payload on disk
|
||
"shard_number": 1, # Single node setup
|
||
"replication_factor": 1, # Single copy (set to 2 for production with HA)
|
||
"optimizers_config": {
|
||
"indexing_threshold": 20000, # Start indexing after 20k points
|
||
"default_segment_number": 0, # Fewer/larger segments for better throughput
|
||
"deleted_threshold": 0.2, # Vacuum when 20% deleted
|
||
"vacuum_min_vector_number": 1000 # Min vectors before vacuum
|
||
}
|
||
}
|
||
|
||
req = make_request(
|
||
f"{QDRANT_URL}/collections/{name}",
|
||
data=config,
|
||
method="PUT"
|
||
)
|
||
|
||
try:
|
||
with urllib.request.urlopen(req, timeout=10) as response:
|
||
result = json.loads(response.read().decode())
|
||
return result.get("result") == True
|
||
except Exception as e:
|
||
print(f"Error creating collection {name}: {e}", file=sys.stderr)
|
||
return False
|
||
|
||
def delete_collection(name):
|
||
"""Delete a collection"""
|
||
import urllib.request
|
||
req = make_request(f"{QDRANT_URL}/collections/{name}", method="DELETE")
|
||
|
||
try:
|
||
with urllib.request.urlopen(req, timeout=5) as response:
|
||
result = json.loads(response.read().decode())
|
||
return result.get("status") == "ok"
|
||
except Exception as e:
|
||
print(f"Error deleting collection {name}: {e}", file=sys.stderr)
|
||
return False
|
||
|
||
def main():
|
||
import urllib.request
|
||
|
||
parser = argparse.ArgumentParser(description="Initialize all Qdrant collections with 2025 best practices")
|
||
parser.add_argument("--recreate", action="store_true", help="Delete and recreate all collections")
|
||
parser.add_argument("--force", action="store_true", help="Force recreate even with existing data")
|
||
args = parser.parse_args()
|
||
|
||
# Check Qdrant connection
|
||
try:
|
||
req = urllib.request.Request(f"{QDRANT_URL}/")
|
||
with urllib.request.urlopen(req, timeout=3) as response:
|
||
pass
|
||
except Exception as e:
|
||
print(f"❌ Cannot connect to Qdrant at {QDRANT_URL}: {e}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
print(f"✅ Connected to Qdrant at {QDRANT_URL}\n")
|
||
|
||
# Check if Ollama is available for embeddings
|
||
try:
|
||
req = urllib.request.Request("http://localhost:11434/api/tags")
|
||
with urllib.request.urlopen(req, timeout=3) as response:
|
||
ollama_status = "✅"
|
||
except Exception:
|
||
ollama_status = "⚠️"
|
||
|
||
print(f"Ollama (localhost): {ollama_status} - Embeddings endpoint\n")
|
||
|
||
created = []
|
||
skipped = []
|
||
errors = []
|
||
recreated = []
|
||
|
||
for name, config in COLLECTIONS.items():
|
||
print(f"--- {name} ---")
|
||
print(f" Description: {config['description']}")
|
||
|
||
exists = collection_exists(name)
|
||
|
||
if exists:
|
||
info = get_collection_info(name)
|
||
if info:
|
||
actual_size = info.get("result", {}).get("config", {}).get("params", {}).get("vectors", {}).get("size", "?")
|
||
points = info.get("result", {}).get("points_count", 0)
|
||
on_disk = info.get("result", {}).get("config", {}).get("params", {}).get("vectors", {}).get("on_disk", False)
|
||
|
||
print(f" ℹ️ Existing collection:")
|
||
print(f" Points: {points}")
|
||
print(f" Vector size: {actual_size}")
|
||
print(f" On disk: {on_disk}")
|
||
|
||
if args.recreate:
|
||
if points > 0 and not args.force:
|
||
print(f" ⚠️ Collection has {points} points. Use --force to recreate with data loss.")
|
||
skipped.append(name)
|
||
continue
|
||
|
||
print(f" Deleting existing collection...")
|
||
if delete_collection(name):
|
||
print(f" ✅ Deleted")
|
||
exists = False
|
||
else:
|
||
print(f" ❌ Failed to delete", file=sys.stderr)
|
||
errors.append(name)
|
||
continue
|
||
else:
|
||
print(f" ⚠️ Already exists, skipping (use --recreate to update)")
|
||
skipped.append(name)
|
||
continue
|
||
|
||
if not exists:
|
||
print(f" Creating collection with 2025 best practices...")
|
||
print(f" - on_disk=True (vectors)")
|
||
print(f" - on_disk_payload=True")
|
||
print(f" - Binary quantization")
|
||
print(f" - Optimizer config")
|
||
|
||
if create_collection(name, config["vector_size"]):
|
||
print(f" ✅ Created (vector size: {config['vector_size']})")
|
||
if args.recreate and name in [c for c in COLLECTIONS]:
|
||
recreated.append(name)
|
||
else:
|
||
created.append(name)
|
||
else:
|
||
print(f" ❌ Failed to create", file=sys.stderr)
|
||
errors.append(name)
|
||
print()
|
||
|
||
# Summary
|
||
print("=" * 50)
|
||
print("SUMMARY:")
|
||
if created:
|
||
print(f" Created: {', '.join(created)}")
|
||
if recreated:
|
||
print(f" Recreated: {', '.join(recreated)}")
|
||
if skipped:
|
||
print(f" Skipped: {', '.join(skipped)}")
|
||
if errors:
|
||
print(f" Errors: {', '.join(errors)}")
|
||
sys.exit(1)
|
||
|
||
print("\n🎉 All collections ready with 2025 best practices!")
|
||
print("\nCollections configured for snowflake-arctic-embed2 (1024 dims)")
|
||
print("- kimi_memories: Personal memories (on_disk=True)")
|
||
print("- kimi_kb: Knowledge base (on_disk=True)")
|
||
print("- private_court_docs: Court documents (on_disk=True)")
|
||
print("\nFeatures enabled:")
|
||
print(" ✓ Vectors stored on disk (minimizes RAM)")
|
||
print(" ✓ Payload stored on disk")
|
||
print(" ✓ Binary quantization for fast search")
|
||
print(" ✓ Optimized indexing thresholds")
|
||
|
||
if __name__ == "__main__":
|
||
main()
|