#!/usr/bin/env python3 """ Migrate memories from kimi_memories to memories_tr - Reads from kimi_memories (Qdrant) - Cleans/strips noise (metadata, thinking tags) - Stores to memories_tr (Qdrant) - Keeps original kimi_memories intact """ import json import urllib.request import urllib.error from datetime import datetime from typing import List, Dict, Any QDRANT_URL = "http://10.0.0.40:6333" SOURCE_COLLECTION = "kimi_memories" TARGET_COLLECTION = "memories_tr" def clean_content(text: str) -> str: """Clean noise from content""" if not text: return "" cleaned = text # Remove metadata JSON blocks import re cleaned = re.sub(r'Conversation info \(untrusted metadata\):\s*```json\s*\{[\s\S]*?\}\s*```', '', cleaned) # Remove thinking tags cleaned = re.sub(r'\[thinking:[^\]]*\]', '', cleaned) # Remove timestamp lines cleaned = re.sub(r'\[\w{3} \d{4}-\d{2}-\d{2} \d{2}:\d{2} [A-Z]{3}\]', '', cleaned) # Clean up whitespace cleaned = re.sub(r'\n{3,}', '\n\n', cleaned) cleaned = cleaned.strip() return cleaned def get_all_points(collection: str) -> List[Dict]: """Get all points from a collection""" all_points = [] offset = None max_iterations = 1000 iterations = 0 while iterations < max_iterations: iterations += 1 scroll_data = { "limit": 100, "with_payload": True, "with_vector": True } if offset: scroll_data["offset"] = offset req = urllib.request.Request( f"{QDRANT_URL}/collections/{collection}/points/scroll", data=json.dumps(scroll_data).encode(), headers={"Content-Type": "application/json"}, method="POST" ) try: with urllib.request.urlopen(req, timeout=60) as response: result = json.loads(response.read().decode()) points = result.get("result", {}).get("points", []) if not points: break all_points.extend(points) offset = result.get("result", {}).get("next_page_offset") if not offset: break except urllib.error.HTTPError as e: print(f"Error: {e}") break return all_points def store_points(collection: str, points: List[Dict]) -> int: """Store points to collection""" if not points: return 0 # Batch upload batch_size = 100 stored = 0 for i in range(0, len(points), batch_size): batch = points[i:i+batch_size] points_data = { "points": batch } req = urllib.request.Request( f"{QDRANT_URL}/collections/{collection}/points", data=json.dumps(points_data).encode(), headers={"Content-Type": "application/json"}, method="PUT" ) try: with urllib.request.urlopen(req, timeout=60) as response: if response.status == 200: stored += len(batch) except urllib.error.HTTPError as e: print(f"Error storing batch: {e}") return stored def migrate_point(point: Dict) -> Dict: """Clean a single point""" payload = point.get("payload", {}) # Clean user and AI messages user_msg = clean_content(payload.get("user_message", "")) ai_msg = clean_content(payload.get("ai_response", "")) # Keep other fields cleaned_payload = { **payload, "user_message": user_msg, "ai_response": ai_msg, "migrated_from": "kimi_memories", "migrated_at": datetime.now().isoformat() } return { "id": point.get("id"), "vector": point.get("vector"), "payload": cleaned_payload } def main(): print("=" * 60) print("Memory Migration: kimi_memories โ†’ memories_tr") print("=" * 60) print() # Check source print(f"๐Ÿ“ฅ Reading from {SOURCE_COLLECTION}...") source_points = get_all_points(SOURCE_COLLECTION) print(f" Found {len(source_points)} points") if not source_points: print("โŒ No points to migrate") return # Clean points print(f"\n๐Ÿงน Cleaning {len(source_points)} points...") cleaned_points = [migrate_point(p) for p in source_points] print(f" โœ“ Cleaned") # Store to target print(f"\n๐Ÿ’พ Storing to {TARGET_COLLECTION}...") stored = store_points(TARGET_COLLECTION, cleaned_points) print(f" โœ“ Stored {stored} points") # Verify print(f"\n๐Ÿ” Verifying...") target_points = get_all_points(TARGET_COLLECTION) print(f" Target now has {len(target_points)} points") # Summary print() print("=" * 60) print("Migration Summary:") print(f" Source ({SOURCE_COLLECTION}): {len(source_points)} points") print(f" Target ({TARGET_COLLECTION}): {len(target_points)} points") print(f" Cleaned & migrated: {stored} points") print("=" * 60) if stored == len(source_points): print("\nโœ… Migration complete!") else: print(f"\nโš ๏ธ Warning: Only migrated {stored}/{len(source_points)} points") if __name__ == "__main__": main()