Initial commit: workspace setup with skills, memory, config
This commit is contained in:
Binary file not shown.
Binary file not shown.
273
skills/qdrant-memory/scripts/activity_log.py
Executable file
273
skills/qdrant-memory/scripts/activity_log.py
Executable file
@@ -0,0 +1,273 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Shared Activity Log for Kimi and Max
|
||||
Prevents duplicate work by logging actions to Qdrant
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import sys
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import Distance, VectorParams, PointStruct
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "activity_log"
|
||||
VECTOR_SIZE = 768 # nomic-embed-text
|
||||
|
||||
# Embedding function (simple keyword-based for now, or use nomic)
|
||||
def simple_embed(text: str) -> list[float]:
|
||||
"""Simple hash-based embedding for semantic similarity"""
|
||||
# In production, use nomic-embed-text via API
|
||||
# For now, use a simple approach that groups similar texts
|
||||
words = text.lower().split()
|
||||
vector = [0.0] * VECTOR_SIZE
|
||||
for i, word in enumerate(words[:100]): # Limit to first 100 words
|
||||
h = hash(word) % VECTOR_SIZE
|
||||
vector[h] += 1.0
|
||||
# Normalize
|
||||
norm = sum(x*x for x in vector) ** 0.5
|
||||
if norm > 0:
|
||||
vector = [x/norm for x in vector]
|
||||
return vector
|
||||
|
||||
def init_collection(client: QdrantClient):
|
||||
"""Create activity_log collection if not exists"""
|
||||
collections = [c.name for c in client.get_collections().collections]
|
||||
if COLLECTION_NAME not in collections:
|
||||
client.create_collection(
|
||||
collection_name=COLLECTION_NAME,
|
||||
vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE)
|
||||
)
|
||||
print(f"Created collection: {COLLECTION_NAME}")
|
||||
|
||||
def log_activity(
|
||||
agent: str,
|
||||
action_type: str,
|
||||
description: str,
|
||||
affected_files: Optional[list] = None,
|
||||
status: str = "completed",
|
||||
metadata: Optional[dict] = None
|
||||
) -> str:
|
||||
"""
|
||||
Log an activity to the shared activity log
|
||||
|
||||
Args:
|
||||
agent: "Kimi" or "Max"
|
||||
action_type: e.g., "cron_created", "file_edited", "config_changed", "task_completed"
|
||||
description: Human-readable description of what was done
|
||||
affected_files: List of file paths or systems affected
|
||||
status: "completed", "in_progress", "blocked", "failed"
|
||||
metadata: Additional key-value pairs
|
||||
|
||||
Returns:
|
||||
activity_id (UUID)
|
||||
"""
|
||||
client = QdrantClient(url=QDRANT_URL)
|
||||
init_collection(client)
|
||||
|
||||
activity_id = str(uuid.uuid4())
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
|
||||
# Build searchable text
|
||||
searchable_text = f"{agent} {action_type} {description} {' '.join(affected_files or [])}"
|
||||
vector = simple_embed(searchable_text)
|
||||
|
||||
payload = {
|
||||
"agent": agent,
|
||||
"action_type": action_type,
|
||||
"description": description,
|
||||
"affected_files": affected_files or [],
|
||||
"status": status,
|
||||
"timestamp": timestamp,
|
||||
"date": date_str,
|
||||
"activity_id": activity_id,
|
||||
"metadata": metadata or {}
|
||||
}
|
||||
|
||||
client.upsert(
|
||||
collection_name=COLLECTION_NAME,
|
||||
points=[PointStruct(id=activity_id, vector=vector, payload=payload)]
|
||||
)
|
||||
|
||||
return activity_id
|
||||
|
||||
def get_recent_activities(
|
||||
agent: Optional[str] = None,
|
||||
action_type: Optional[str] = None,
|
||||
hours: int = 24,
|
||||
limit: int = 50
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Query recent activities
|
||||
|
||||
Args:
|
||||
agent: Filter by agent name ("Kimi" or "Max") or None for both
|
||||
action_type: Filter by action type or None for all
|
||||
hours: Look back this many hours
|
||||
limit: Max results
|
||||
"""
|
||||
client = QdrantClient(url=QDRANT_URL)
|
||||
|
||||
# Get all points and filter client-side (Qdrant payload filtering can be tricky)
|
||||
# For small collections, this is fine. For large ones, use scroll with filter
|
||||
all_points = client.scroll(
|
||||
collection_name=COLLECTION_NAME,
|
||||
limit=1000 # Get recent batch
|
||||
)[0]
|
||||
|
||||
results = []
|
||||
cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600)
|
||||
|
||||
for point in all_points:
|
||||
payload = point.payload
|
||||
ts = payload.get("timestamp", "")
|
||||
try:
|
||||
point_time = datetime.fromisoformat(ts.replace("Z", "+00:00")).timestamp()
|
||||
except:
|
||||
continue
|
||||
|
||||
if point_time < cutoff:
|
||||
continue
|
||||
|
||||
if agent and payload.get("agent") != agent:
|
||||
continue
|
||||
|
||||
if action_type and payload.get("action_type") != action_type:
|
||||
continue
|
||||
|
||||
results.append(payload)
|
||||
|
||||
# Sort by timestamp descending
|
||||
results.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
|
||||
return results[:limit]
|
||||
|
||||
def search_activities(query: str, limit: int = 10) -> list[dict]:
|
||||
"""Semantic search across activity descriptions"""
|
||||
client = QdrantClient(url=QDRANT_URL)
|
||||
vector = simple_embed(query)
|
||||
|
||||
results = client.search(
|
||||
collection_name=COLLECTION_NAME,
|
||||
query_vector=vector,
|
||||
limit=limit
|
||||
)
|
||||
|
||||
return [r.payload for r in results]
|
||||
|
||||
def check_for_duplicates(action_type: str, description_keywords: str, hours: int = 6) -> bool:
|
||||
"""
|
||||
Check if similar work was recently done
|
||||
Returns True if duplicate detected, False otherwise
|
||||
"""
|
||||
recent = get_recent_activities(action_type=action_type, hours=hours)
|
||||
|
||||
keywords = description_keywords.lower().split()
|
||||
for activity in recent:
|
||||
desc = activity.get("description", "").lower()
|
||||
if all(kw in desc for kw in keywords):
|
||||
print(f"⚠️ Duplicate detected: {activity['agent']} did similar work {activity['timestamp']}")
|
||||
print(f" Description: {activity['description']}")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Shared Activity Log for Kimi/Max")
|
||||
subparsers = parser.add_subparsers(dest="command", help="Command to run")
|
||||
|
||||
# Log command
|
||||
log_parser = subparsers.add_parser("log", help="Log an activity")
|
||||
log_parser.add_argument("--agent", required=True, choices=["Kimi", "Max"], help="Which agent performed the action")
|
||||
log_parser.add_argument("--action", required=True, help="Action type (e.g., cron_created, file_edited)")
|
||||
log_parser.add_argument("--description", required=True, help="What was done")
|
||||
log_parser.add_argument("--files", nargs="*", help="Files/systems affected")
|
||||
log_parser.add_argument("--status", default="completed", choices=["completed", "in_progress", "blocked", "failed"])
|
||||
log_parser.add_argument("--check-duplicate", action="store_true", help="Check for duplicates before logging")
|
||||
log_parser.add_argument("--duplicate-keywords", help="Keywords to check for duplicates (if different from description)")
|
||||
|
||||
# Recent command
|
||||
recent_parser = subparsers.add_parser("recent", help="Show recent activities")
|
||||
recent_parser.add_argument("--agent", choices=["Kimi", "Max"], help="Filter by agent")
|
||||
recent_parser.add_argument("--action", help="Filter by action type")
|
||||
recent_parser.add_argument("--hours", type=int, default=24, help="Hours to look back")
|
||||
recent_parser.add_argument("--limit", type=int, default=20, help="Max results")
|
||||
|
||||
# Search command
|
||||
search_parser = subparsers.add_parser("search", help="Search activities")
|
||||
search_parser.add_argument("query", help="Search query")
|
||||
search_parser.add_argument("--limit", type=int, default=10)
|
||||
|
||||
# Check command
|
||||
check_parser = subparsers.add_parser("check", help="Check for duplicate work")
|
||||
check_parser.add_argument("--action", required=True, help="Action type")
|
||||
check_parser.add_argument("--keywords", required=True, help="Keywords to check")
|
||||
check_parser.add_argument("--hours", type=int, default=6, help="Hours to look back")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == "log":
|
||||
if args.check_duplicate:
|
||||
keywords = args.duplicate_keywords or args.description
|
||||
if check_for_duplicates(args.action, keywords):
|
||||
response = input("Proceed anyway? (y/n): ")
|
||||
if response.lower() != "y":
|
||||
print("Cancelled.")
|
||||
sys.exit(0)
|
||||
|
||||
activity_id = log_activity(
|
||||
agent=args.agent,
|
||||
action_type=args.action,
|
||||
description=args.description,
|
||||
affected_files=args.files,
|
||||
status=args.status
|
||||
)
|
||||
print(f"✓ Logged activity: {activity_id}")
|
||||
|
||||
elif args.command == "recent":
|
||||
activities = get_recent_activities(
|
||||
agent=args.agent,
|
||||
action_type=args.action,
|
||||
hours=args.hours,
|
||||
limit=args.limit
|
||||
)
|
||||
|
||||
print(f"\nRecent activities (last {args.hours}h):\n")
|
||||
for a in activities:
|
||||
agent_icon = "🤖" if a["agent"] == "Max" else "🎙️"
|
||||
status_icon = {
|
||||
"completed": "✓",
|
||||
"in_progress": "◐",
|
||||
"blocked": "✗",
|
||||
"failed": "⚠"
|
||||
}.get(a["status"], "?")
|
||||
|
||||
print(f"{agent_icon} [{a['timestamp'][:19]}] {status_icon} {a['action_type']}")
|
||||
print(f" {a['description']}")
|
||||
if a['affected_files']:
|
||||
print(f" Files: {', '.join(a['affected_files'])}")
|
||||
print()
|
||||
|
||||
elif args.command == "search":
|
||||
results = search_activities(args.query, args.limit)
|
||||
|
||||
print(f"\nSearch results for '{args.query}':\n")
|
||||
for r in results:
|
||||
print(f"[{r['agent']}] {r['action_type']}: {r['description']}")
|
||||
print(f" {r['timestamp'][:19]} | Status: {r['status']}")
|
||||
print()
|
||||
|
||||
elif args.command == "check":
|
||||
is_dup = check_for_duplicates(args.action, args.keywords, args.hours)
|
||||
sys.exit(1 if is_dup else 0)
|
||||
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
191
skills/qdrant-memory/scripts/agent_chat.py
Executable file
191
skills/qdrant-memory/scripts/agent_chat.py
Executable file
@@ -0,0 +1,191 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Agent Messaging System - Redis Streams
|
||||
Kimi and Max shared communication channel
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import time
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import redis
|
||||
|
||||
REDIS_HOST = "10.0.0.36"
|
||||
REDIS_PORT = 6379
|
||||
STREAM_NAME = "agent-messages"
|
||||
LAST_READ_KEY = "agent:last_read:{agent}"
|
||||
|
||||
class AgentChat:
|
||||
def __init__(self, agent_name):
|
||||
self.agent = agent_name
|
||||
self.r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, decode_responses=True)
|
||||
|
||||
def send(self, msg_type, message, reply_to=None, from_user=False):
|
||||
"""Send a message to the stream"""
|
||||
entry = {
|
||||
"agent": self.agent,
|
||||
"type": msg_type, # idea, question, update, reply
|
||||
"message": message,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"reply_to": reply_to or "",
|
||||
"from_user": str(from_user).lower() # "true" if from Rob, "false" if from agent
|
||||
}
|
||||
|
||||
msg_id = self.r.xadd(STREAM_NAME, entry)
|
||||
print(f"[{self.agent}] Sent: {msg_id}")
|
||||
return msg_id
|
||||
|
||||
def read_new(self, block_ms=1000):
|
||||
"""Read messages since last check"""
|
||||
last_id = self.r.get(LAST_READ_KEY.format(agent=self.agent)) or "0"
|
||||
|
||||
result = self.r.xread(
|
||||
{STREAM_NAME: last_id},
|
||||
block=block_ms
|
||||
)
|
||||
|
||||
if not result:
|
||||
return []
|
||||
|
||||
messages = []
|
||||
for stream_name, entries in result:
|
||||
for msg_id, data in entries:
|
||||
messages.append({"id": msg_id, **data})
|
||||
# Update last read position
|
||||
self.r.set(LAST_READ_KEY.format(agent=self.agent), msg_id)
|
||||
|
||||
return messages
|
||||
|
||||
def read_all(self, count=50):
|
||||
"""Read last N messages regardless of read status"""
|
||||
entries = self.r.xrevrange(STREAM_NAME, count=count)
|
||||
|
||||
messages = []
|
||||
for msg_id, data in entries:
|
||||
messages.append({"id": msg_id, **data})
|
||||
|
||||
return messages
|
||||
|
||||
def read_since(self, hours=24):
|
||||
"""Read messages from last N hours"""
|
||||
cutoff = time.time() - (hours * 3600)
|
||||
cutoff_ms = int(cutoff * 1000)
|
||||
|
||||
# Get messages since cutoff (approximate using ID which is timestamp-based)
|
||||
entries = self.r.xrange(STREAM_NAME, min=f"{cutoff_ms}-0", count=1000)
|
||||
|
||||
messages = []
|
||||
for msg_id, data in entries:
|
||||
messages.append({"id": msg_id, **data})
|
||||
|
||||
return messages
|
||||
|
||||
def wait_for_reply(self, reply_to_id, timeout_sec=30):
|
||||
"""Block until a reply to a specific message arrives"""
|
||||
start = time.time()
|
||||
last_check = "0"
|
||||
|
||||
while time.time() - start < timeout_sec:
|
||||
result = self.r.xread({STREAM_NAME: last_check}, block=timeout_sec*1000)
|
||||
|
||||
if result:
|
||||
for stream_name, entries in result:
|
||||
for msg_id, data in entries:
|
||||
last_check = msg_id
|
||||
if data.get("reply_to") == reply_to_id:
|
||||
return {"id": msg_id, **data}
|
||||
|
||||
time.sleep(0.5)
|
||||
|
||||
return None
|
||||
|
||||
def format_message(self, msg):
|
||||
"""Pretty print a message"""
|
||||
ts = msg.get("timestamp", "")[11:19] # HH:MM:SS only
|
||||
agent = msg.get("agent", "?")
|
||||
msg_type = msg.get("type", "?")
|
||||
text = msg.get("message", "")
|
||||
reply_to = msg.get("reply_to", "")
|
||||
from_user = msg.get("from_user", "false") == "true"
|
||||
|
||||
icon = "🤖" if agent == "Max" else "🎙️"
|
||||
type_icon = {
|
||||
"idea": "💡",
|
||||
"question": "❓",
|
||||
"update": "📢",
|
||||
"reply": "↩️"
|
||||
}.get(msg_type, "•")
|
||||
|
||||
# Show 📝 if message is from Rob (relayed by agent), otherwise show agent icon only
|
||||
source_icon = "📝" if from_user else icon
|
||||
|
||||
reply_info = f" [reply to {reply_to[:8]}...]" if reply_to else ""
|
||||
return f"[{ts}] {source_icon} {agent} {type_icon} {text}{reply_info}"
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Agent messaging via Redis Streams")
|
||||
parser.add_argument("--agent", required=True, choices=["Kimi", "Max"], help="Your agent name")
|
||||
|
||||
subparsers = parser.add_subparsers(dest="command", help="Command")
|
||||
|
||||
# Send command
|
||||
send_p = subparsers.add_parser("send", help="Send a message")
|
||||
send_p.add_argument("--type", default="update", choices=["idea", "question", "update", "reply"])
|
||||
send_p.add_argument("--message", "-m", required=True, help="Message text")
|
||||
send_p.add_argument("--reply-to", help="Reply to message ID")
|
||||
send_p.add_argument("--from-user", action="store_true", help="Mark as message from Rob (not from agent)")
|
||||
|
||||
# Read command
|
||||
read_p = subparsers.add_parser("read", help="Read messages")
|
||||
read_p.add_argument("--new", action="store_true", help="Only unread messages")
|
||||
read_p.add_argument("--all", action="store_true", help="Last 50 messages")
|
||||
read_p.add_argument("--since", type=int, help="Messages from last N hours")
|
||||
read_p.add_argument("--wait", action="store_true", help="Wait for new messages (blocking)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
chat = AgentChat(args.agent)
|
||||
|
||||
if args.command == "send":
|
||||
msg_id = chat.send(args.type, args.message, args.reply_to, args.from_user)
|
||||
print(f"Message ID: {msg_id}")
|
||||
|
||||
elif args.command == "read":
|
||||
if args.new or args.wait:
|
||||
if args.wait:
|
||||
print("Waiting for messages... (Ctrl+C to stop)")
|
||||
try:
|
||||
while True:
|
||||
msgs = chat.read_new(block_ms=5000)
|
||||
for m in msgs:
|
||||
print(chat.format_message(m))
|
||||
except KeyboardInterrupt:
|
||||
print("\nStopped.")
|
||||
else:
|
||||
msgs = chat.read_new()
|
||||
for m in msgs:
|
||||
print(chat.format_message(m))
|
||||
if not msgs:
|
||||
print("No new messages.")
|
||||
|
||||
elif args.since:
|
||||
msgs = chat.read_since(args.since)
|
||||
for m in msgs:
|
||||
print(chat.format_message(m))
|
||||
if not msgs:
|
||||
print(f"No messages in last {args.since} hours.")
|
||||
|
||||
else: # default --all
|
||||
msgs = chat.read_all()
|
||||
for m in reversed(msgs): # Chronological order
|
||||
print(chat.format_message(m))
|
||||
if not msgs:
|
||||
print("No messages in stream.")
|
||||
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
181
skills/qdrant-memory/scripts/agent_check.py
Executable file
181
skills/qdrant-memory/scripts/agent_check.py
Executable file
@@ -0,0 +1,181 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Check agent messages from Redis stream
|
||||
Usage: agent_check.py [--list N] [--check] [--last-minutes M]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
|
||||
# Add parent to path for imports
|
||||
sys.path.insert(0, '/root/.openclaw/workspace/skills/qdrant-memory')
|
||||
|
||||
try:
|
||||
import redis
|
||||
except ImportError:
|
||||
print("❌ Redis module not available")
|
||||
sys.exit(1)
|
||||
|
||||
REDIS_HOST = "10.0.0.36"
|
||||
REDIS_PORT = 6379
|
||||
STREAM_KEY = "agent-messages"
|
||||
LAST_CHECKED_KEY = "agent:last_check_timestamp"
|
||||
|
||||
def get_redis_client():
|
||||
"""Get Redis connection"""
|
||||
try:
|
||||
return redis.Redis(
|
||||
host=REDIS_HOST,
|
||||
port=REDIS_PORT,
|
||||
decode_responses=True,
|
||||
socket_connect_timeout=5,
|
||||
socket_timeout=5
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"❌ Redis connection failed: {e}")
|
||||
return None
|
||||
|
||||
def get_messages_since(last_check=None, count=10):
|
||||
"""Get messages from Redis stream since last check"""
|
||||
r = get_redis_client()
|
||||
if not r:
|
||||
return []
|
||||
|
||||
try:
|
||||
# Get last N messages from stream
|
||||
messages = r.xrevrange(STREAM_KEY, count=count)
|
||||
|
||||
result = []
|
||||
for msg_id, msg_data in messages:
|
||||
# Parse message data
|
||||
data = {}
|
||||
for k, v in msg_data.items():
|
||||
data[k] = v
|
||||
|
||||
# Extract timestamp from message ID
|
||||
timestamp_ms = int(msg_id.split('-')[0])
|
||||
msg_time = datetime.fromtimestamp(timestamp_ms / 1000, tz=timezone.utc)
|
||||
|
||||
# Filter by last check if provided
|
||||
if last_check:
|
||||
if timestamp_ms <= last_check:
|
||||
continue
|
||||
|
||||
result.append({
|
||||
'id': msg_id,
|
||||
'time': msg_time,
|
||||
'data': data
|
||||
})
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f"❌ Error reading stream: {e}")
|
||||
return []
|
||||
|
||||
def update_last_check():
|
||||
"""Update the last check timestamp"""
|
||||
r = get_redis_client()
|
||||
if not r:
|
||||
return False
|
||||
|
||||
try:
|
||||
now_ms = int(time.time() * 1000)
|
||||
r.set(LAST_CHECKED_KEY, str(now_ms))
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ Error updating timestamp: {e}")
|
||||
return False
|
||||
|
||||
def get_last_check_time():
|
||||
"""Get the last check timestamp"""
|
||||
r = get_redis_client()
|
||||
if not r:
|
||||
return None
|
||||
|
||||
try:
|
||||
last = r.get(LAST_CHECKED_KEY)
|
||||
if last:
|
||||
return int(last)
|
||||
return None
|
||||
except:
|
||||
return None
|
||||
|
||||
def format_message(msg):
|
||||
"""Format a message for display"""
|
||||
time_str = msg['time'].strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||
data = msg['data']
|
||||
|
||||
sender = data.get('sender', 'unknown')
|
||||
recipient = data.get('recipient', 'all')
|
||||
msg_type = data.get('type', 'message')
|
||||
content = data.get('content', '')
|
||||
|
||||
return f"[{time_str}] {sender} → {recipient} ({msg_type}):\n {content[:200]}{'...' if len(content) > 200 else ''}"
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Check agent messages from Redis")
|
||||
parser.add_argument("--list", "-l", type=int, metavar="N", help="List last N messages")
|
||||
parser.add_argument("--check", "-c", action="store_true", help="Check for new messages since last check")
|
||||
parser.add_argument("--last-minutes", "-m", type=int, metavar="M", help="Check messages from last M minutes")
|
||||
parser.add_argument("--mark-read", action="store_true", help="Update last check timestamp after reading")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.check:
|
||||
last_check = get_last_check_time()
|
||||
messages = get_messages_since(last_check)
|
||||
|
||||
if messages:
|
||||
print(f"🔔 {len(messages)} new message(s):")
|
||||
for msg in reversed(messages): # Oldest first
|
||||
print(format_message(msg))
|
||||
print()
|
||||
else:
|
||||
print("✅ No new messages")
|
||||
|
||||
if args.mark_read:
|
||||
update_last_check()
|
||||
print("📌 Last check time updated")
|
||||
|
||||
elif args.last_minutes:
|
||||
since_ms = int((time.time() - args.last_minutes * 60) * 1000)
|
||||
messages = get_messages_since(since_ms)
|
||||
|
||||
if messages:
|
||||
print(f"📨 {len(messages)} message(s) from last {args.last_minutes} minutes:")
|
||||
for msg in reversed(messages):
|
||||
print(format_message(msg))
|
||||
print()
|
||||
else:
|
||||
print(f"✅ No messages in last {args.last_minutes} minutes")
|
||||
|
||||
elif args.list:
|
||||
messages = get_messages_since(count=args.list)
|
||||
|
||||
if messages:
|
||||
print(f"📜 Last {len(messages)} message(s):")
|
||||
for msg in reversed(messages):
|
||||
print(format_message(msg))
|
||||
print()
|
||||
else:
|
||||
print("📭 No messages in stream")
|
||||
|
||||
else:
|
||||
# Default: check for new messages
|
||||
last_check = get_last_check_time()
|
||||
messages = get_messages_since(last_check)
|
||||
|
||||
if messages:
|
||||
print(f"🔔 {len(messages)} new message(s):")
|
||||
for msg in reversed(messages):
|
||||
print(format_message(msg))
|
||||
print()
|
||||
update_last_check()
|
||||
else:
|
||||
print("✅ No new messages")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
275
skills/qdrant-memory/scripts/api_scraper.py
Executable file
275
skills/qdrant-memory/scripts/api_scraper.py
Executable file
@@ -0,0 +1,275 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
API Scraper - REST API client with pagination support
|
||||
Usage: api_scraper.py https://api.example.com/items --domain "API" --path "Endpoints/Items"
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import json
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from scrape_to_kb import chunk_text, get_embedding, compute_checksum, store_in_kb
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "knowledge_base"
|
||||
|
||||
class APIScraper:
|
||||
def __init__(self, base_url, headers=None, rate_limit=0):
|
||||
self.base_url = base_url
|
||||
self.headers = headers or {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
self.rate_limit = rate_limit # seconds between requests
|
||||
|
||||
def fetch(self, url, params=None):
|
||||
"""Fetch JSON from API"""
|
||||
if params:
|
||||
import urllib.parse
|
||||
query = urllib.parse.urlencode(params)
|
||||
url = f"{url}?{query}" if '?' not in url else f"{url}&{query}"
|
||||
|
||||
req = urllib.request.Request(url, headers=self.headers)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
return json.loads(response.read().decode())
|
||||
except urllib.error.HTTPError as e:
|
||||
print(f"❌ HTTP {e.code}: {e.reason}", file=sys.stderr)
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def paginate(self, endpoint, page_param="page", size_param="limit",
|
||||
size=100, max_pages=None, data_key=None):
|
||||
"""Fetch paginated results"""
|
||||
all_data = []
|
||||
page = 1
|
||||
|
||||
while True:
|
||||
params = {page_param: page, size_param: size}
|
||||
url = f"{self.base_url}{endpoint}" if not endpoint.startswith('http') else endpoint
|
||||
|
||||
print(f"📄 Fetching page {page}...")
|
||||
data = self.fetch(url, params)
|
||||
|
||||
if not data:
|
||||
break
|
||||
|
||||
# Extract items from response
|
||||
if data_key:
|
||||
items = data.get(data_key, [])
|
||||
elif isinstance(data, list):
|
||||
items = data
|
||||
else:
|
||||
# Try common keys
|
||||
for key in ['data', 'items', 'results', 'records', 'docs']:
|
||||
if key in data:
|
||||
items = data[key]
|
||||
break
|
||||
else:
|
||||
items = [data] # Single item
|
||||
|
||||
if not items:
|
||||
break
|
||||
|
||||
all_data.extend(items)
|
||||
|
||||
# Check for more pages
|
||||
if max_pages and page >= max_pages:
|
||||
print(f" Reached max pages ({max_pages})")
|
||||
break
|
||||
|
||||
# Check if we got less than requested (last page)
|
||||
if len(items) < size:
|
||||
break
|
||||
|
||||
page += 1
|
||||
|
||||
if self.rate_limit:
|
||||
import time
|
||||
time.sleep(self.rate_limit)
|
||||
|
||||
return all_data
|
||||
|
||||
def format_for_kb(self, items, format_template=None):
|
||||
"""Format API items as text for knowledge base"""
|
||||
if not items:
|
||||
return ""
|
||||
|
||||
parts = []
|
||||
|
||||
for i, item in enumerate(items):
|
||||
if format_template:
|
||||
# Use custom template
|
||||
try:
|
||||
text = format_template.format(**item, index=i+1)
|
||||
except KeyError:
|
||||
text = json.dumps(item, indent=2)
|
||||
else:
|
||||
# Auto-format
|
||||
text = self._auto_format(item)
|
||||
|
||||
parts.append(text)
|
||||
|
||||
return "\n\n---\n\n".join(parts)
|
||||
|
||||
def _auto_format(self, item):
|
||||
"""Auto-format a JSON item as readable text"""
|
||||
if isinstance(item, str):
|
||||
return item
|
||||
|
||||
if not isinstance(item, dict):
|
||||
return json.dumps(item, indent=2)
|
||||
|
||||
parts = []
|
||||
|
||||
# Title/Name first
|
||||
for key in ['name', 'title', 'id', 'key']:
|
||||
if key in item:
|
||||
parts.append(f"# {item[key]}")
|
||||
break
|
||||
|
||||
# Description/summary
|
||||
for key in ['description', 'summary', 'content', 'body', 'text']:
|
||||
if key in item:
|
||||
parts.append(f"\n{item[key]}")
|
||||
break
|
||||
|
||||
# Other fields
|
||||
skip = ['name', 'title', 'id', 'key', 'description', 'summary', 'content', 'body', 'text']
|
||||
for key, value in item.items():
|
||||
if key in skip:
|
||||
continue
|
||||
if value is None:
|
||||
continue
|
||||
if isinstance(value, (list, dict)):
|
||||
value = json.dumps(value, indent=2)
|
||||
parts.append(f"\n**{key}:** {value}")
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Scrape REST API to knowledge base")
|
||||
parser.add_argument("url", help="API endpoint URL")
|
||||
parser.add_argument("--domain", required=True, help="Knowledge domain")
|
||||
parser.add_argument("--path", required=True, help="Hierarchical path")
|
||||
parser.add_argument("--paginate", action="store_true", help="Enable pagination")
|
||||
parser.add_argument("--page-param", default="page", help="Page parameter name")
|
||||
parser.add_argument("--size-param", default="limit", help="Page size parameter name")
|
||||
parser.add_argument("--size", type=int, default=100, help="Items per page")
|
||||
parser.add_argument("--max-pages", type=int, help="Max pages to fetch")
|
||||
parser.add_argument("--data-key", help="Key containing data array in response")
|
||||
parser.add_argument("--header", action='append', nargs=2, metavar=('KEY', 'VALUE'),
|
||||
help="Custom headers (e.g., --header Authorization 'Bearer token')")
|
||||
parser.add_argument("--format", help="Python format string for item display")
|
||||
parser.add_argument("--category", default="reference")
|
||||
parser.add_argument("--content-type", default="api_data")
|
||||
parser.add_argument("--subjects", help="Comma-separated subjects")
|
||||
parser.add_argument("--title", help="Content title")
|
||||
parser.add_argument("--output", "-o", help="Save to JSON file instead of KB")
|
||||
parser.add_argument("--rate-limit", type=float, default=0.5,
|
||||
help="Seconds between requests (default: 0.5)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Build headers
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
if args.header:
|
||||
for key, value in args.header:
|
||||
headers[key] = value
|
||||
|
||||
scraper = APIScraper(args.url, headers=headers, rate_limit=args.rate_limit)
|
||||
|
||||
print(f"🔌 API: {args.url}")
|
||||
print(f"🏷️ Domain: {args.domain}")
|
||||
print(f"📂 Path: {args.path}")
|
||||
|
||||
# Fetch data
|
||||
if args.paginate:
|
||||
print("📄 Pagination enabled\n")
|
||||
items = scraper.paginate(
|
||||
args.url,
|
||||
page_param=args.page_param,
|
||||
size_param=args.size_param,
|
||||
size=args.size,
|
||||
max_pages=args.max_pages,
|
||||
data_key=args.data_key
|
||||
)
|
||||
else:
|
||||
print("📄 Single request\n")
|
||||
data = scraper.fetch(args.url)
|
||||
if data_key := args.data_key:
|
||||
items = data.get(data_key, []) if data else []
|
||||
elif isinstance(data, list):
|
||||
items = data
|
||||
else:
|
||||
items = [data] if data else []
|
||||
|
||||
if not items:
|
||||
print("❌ No data fetched", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"✓ Fetched {len(items)} items")
|
||||
|
||||
if args.output:
|
||||
with open(args.output, 'w') as f:
|
||||
json.dump(items, f, indent=2)
|
||||
print(f"💾 Saved raw data to {args.output}")
|
||||
return
|
||||
|
||||
# Format for KB
|
||||
text = scraper.format_for_kb(items, args.format)
|
||||
|
||||
print(f"📝 Formatted: {len(text)} chars")
|
||||
|
||||
if len(text) < 200:
|
||||
print("❌ Content too short", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
chunks = chunk_text(text)
|
||||
print(f"🧩 Chunks: {len(chunks)}")
|
||||
|
||||
subjects = [s.strip() for s in args.subjects.split(",")] if args.subjects else []
|
||||
checksum = compute_checksum(text)
|
||||
title = args.title or f"API Data from {args.url}"
|
||||
|
||||
print("💾 Storing...")
|
||||
stored = 0
|
||||
for i, chunk in enumerate(chunks):
|
||||
chunk_metadata = {
|
||||
"domain": args.domain,
|
||||
"path": f"{args.path}/chunk-{i+1}",
|
||||
"subjects": subjects,
|
||||
"category": args.category,
|
||||
"content_type": args.content_type,
|
||||
"title": f"{title} (part {i+1}/{len(chunks)})",
|
||||
"checksum": checksum,
|
||||
"source_url": args.url,
|
||||
"date_added": datetime.now().strftime("%Y-%m-%d"),
|
||||
"chunk_index": i + 1,
|
||||
"total_chunks": len(chunks),
|
||||
"text_preview": chunk[:200] + "..." if len(chunk) > 200 else chunk,
|
||||
"scraper_type": "api_rest",
|
||||
"item_count": len(items),
|
||||
"api_endpoint": args.url
|
||||
}
|
||||
|
||||
if store_in_kb(chunk, chunk_metadata):
|
||||
stored += 1
|
||||
print(f" ✓ Chunk {i+1}")
|
||||
|
||||
print(f"\n🎉 Stored {stored}/{len(chunks)} chunks")
|
||||
print(f" Source: {args.url}")
|
||||
print(f" Items: {len(items)}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
301
skills/qdrant-memory/scripts/auto_memory.py
Executable file
301
skills/qdrant-memory/scripts/auto_memory.py
Executable file
@@ -0,0 +1,301 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Auto-memory management with proactive context retrieval
|
||||
Usage: auto_memory.py store "text" [--importance medium] [--tags tag1,tag2]
|
||||
auto_memory.py search "query" [--limit 3]
|
||||
auto_memory.py should_store "conversation_snippet"
|
||||
auto_memory.py context "current_topic" [--min-score 0.6]
|
||||
auto_memory.py proactive "user_message" [--auto-include]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
WORKSPACE = "/root/.openclaw/workspace"
|
||||
QDRANT_SKILL = f"{WORKSPACE}/skills/qdrant-memory/scripts"
|
||||
|
||||
def store_memory(text, importance="medium", tags=None, confidence="high",
|
||||
source_type="user", verified=True, expires=None):
|
||||
"""Store a memory automatically with full metadata"""
|
||||
cmd = [
|
||||
"python3", f"{QDRANT_SKILL}/store_memory.py",
|
||||
text,
|
||||
"--importance", importance,
|
||||
"--confidence", confidence,
|
||||
"--source-type", source_type,
|
||||
]
|
||||
if verified:
|
||||
cmd.append("--verified")
|
||||
if tags:
|
||||
cmd.extend(["--tags", ",".join(tags)])
|
||||
if expires:
|
||||
cmd.extend(["--expires", expires])
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
return result.returncode == 0
|
||||
|
||||
def search_memories(query, limit=3, min_score=0.0):
|
||||
"""Search memories for relevant context"""
|
||||
cmd = [
|
||||
"python3", f"{QDRANT_SKILL}/search_memories.py",
|
||||
query,
|
||||
"--limit", str(limit),
|
||||
"--json"
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
||||
if result.returncode == 0:
|
||||
try:
|
||||
memories = json.loads(result.stdout)
|
||||
# Filter by score if specified
|
||||
if min_score > 0:
|
||||
memories = [m for m in memories if m.get("score", 0) >= min_score]
|
||||
return memories
|
||||
except:
|
||||
return []
|
||||
return []
|
||||
|
||||
def should_store_memory(text):
|
||||
"""Determine if a memory should be stored based on content"""
|
||||
text_lower = text.lower()
|
||||
|
||||
# Explicit store markers (highest priority)
|
||||
explicit_markers = ["remember this", "note this", "save this", "log this", "record this"]
|
||||
if any(marker in text_lower for marker in explicit_markers):
|
||||
return True, "explicit_store", "high"
|
||||
|
||||
# Permanent markers (never expire)
|
||||
permanent_markers = [
|
||||
"my name is", "i am ", "i'm ", "call me", "i live in", "my address",
|
||||
"my phone", "my email", "my birthday", "i work at", "my job"
|
||||
]
|
||||
if any(marker in text_lower for marker in permanent_markers):
|
||||
return True, "permanent_fact", "high"
|
||||
|
||||
# Preference/decision indicators
|
||||
pref_markers = ["i prefer", "i like", "i want", "my favorite", "i need", "i use", "i choose"]
|
||||
if any(marker in text_lower for marker in pref_markers):
|
||||
return True, "preference", "high"
|
||||
|
||||
# Setup/achievement markers
|
||||
setup_markers = ["setup", "installed", "configured", "working", "completed", "finished", "created"]
|
||||
if any(marker in text_lower for marker in setup_markers):
|
||||
return True, "setup_complete", "medium"
|
||||
|
||||
# Rule/policy markers
|
||||
rule_markers = ["rule", "policy", "always", "never", "every", "schedule", "deadline"]
|
||||
if any(marker in text_lower for marker in rule_markers):
|
||||
return True, "rule_policy", "high"
|
||||
|
||||
# Temporary markers (should expire)
|
||||
temp_markers = ["for today", "for now", "temporarily", "this time only", "just for"]
|
||||
if any(marker in text_lower for marker in temp_markers):
|
||||
return True, "temporary", "low", "7d" # 7 day expiration
|
||||
|
||||
# Important keywords (check density)
|
||||
important_keywords = [
|
||||
"important", "critical", "essential", "key", "main", "primary",
|
||||
"password", "api key", "token", "secret", "backup", "restore",
|
||||
"decision", "choice", "selected", "chose", "picked"
|
||||
]
|
||||
matches = sum(1 for kw in important_keywords if kw in text_lower)
|
||||
if matches >= 2:
|
||||
return True, "keyword_match", "medium"
|
||||
|
||||
# Error/lesson learned markers
|
||||
lesson_markers = ["error", "mistake", "fixed", "solved", "lesson", "learned", "solution"]
|
||||
if any(marker in text_lower for marker in lesson_markers):
|
||||
return True, "lesson", "high"
|
||||
|
||||
return False, "not_important", None
|
||||
|
||||
def get_relevant_context(query, min_score=0.6, limit=5):
|
||||
"""Get relevant memories for current context with smart filtering"""
|
||||
memories = search_memories(query, limit=limit, min_score=min_score)
|
||||
|
||||
# Sort by importance and score
|
||||
importance_order = {"high": 0, "medium": 1, "low": 2}
|
||||
memories.sort(key=lambda m: (
|
||||
importance_order.get(m.get("importance", "medium"), 1),
|
||||
-m.get("score", 0)
|
||||
))
|
||||
|
||||
return memories
|
||||
|
||||
def proactive_retrieval(user_message, auto_include=False):
|
||||
"""
|
||||
Proactively retrieve relevant memories based on user message.
|
||||
Returns relevant memories that might be helpful context.
|
||||
"""
|
||||
# Extract key concepts from the message
|
||||
# Simple approach: use the whole message as query
|
||||
# Better approach: extract noun phrases (could be enhanced)
|
||||
|
||||
memories = get_relevant_context(user_message, min_score=0.5, limit=5)
|
||||
|
||||
if not memories:
|
||||
return []
|
||||
|
||||
# Filter for highly relevant or important memories
|
||||
proactive_memories = []
|
||||
for m in memories:
|
||||
score = m.get("score", 0)
|
||||
importance = m.get("importance", "medium")
|
||||
|
||||
# Include if:
|
||||
# - High score (0.7+) regardless of importance
|
||||
# - Medium score (0.5+) AND high importance
|
||||
if score >= 0.7 or (score >= 0.5 and importance == "high"):
|
||||
proactive_memories.append(m)
|
||||
|
||||
return proactive_memories
|
||||
|
||||
def format_context_for_prompt(memories):
|
||||
"""Format memories as context for the LLM prompt"""
|
||||
if not memories:
|
||||
return ""
|
||||
|
||||
context = "\n[Relevant context from previous conversations]:\n"
|
||||
for i, m in enumerate(memories, 1):
|
||||
text = m.get("text", "")
|
||||
date = m.get("date", "unknown")
|
||||
importance = m.get("importance", "medium")
|
||||
|
||||
prefix = "🔴" if importance == "high" else "🟡" if importance == "medium" else "🟢"
|
||||
context += f"{prefix} [{date}] {text}\n"
|
||||
|
||||
return context
|
||||
|
||||
def auto_tag(text, reason):
|
||||
"""Automatically generate tags based on content"""
|
||||
tags = []
|
||||
|
||||
# Add tag based on reason
|
||||
reason_tags = {
|
||||
"explicit_store": "recorded",
|
||||
"permanent_fact": "identity",
|
||||
"preference": "preference",
|
||||
"setup_complete": "setup",
|
||||
"rule_policy": "policy",
|
||||
"temporary": "temporary",
|
||||
"keyword_match": "important",
|
||||
"lesson": "lesson"
|
||||
}
|
||||
if reason in reason_tags:
|
||||
tags.append(reason_tags[reason])
|
||||
|
||||
# Content-based tags
|
||||
text_lower = text.lower()
|
||||
content_tags = {
|
||||
"voice": ["voice", "tts", "stt", "whisper", "audio", "speak"],
|
||||
"tools": ["tool", "script", "command", "cli", "error"],
|
||||
"config": ["config", "setting", "setup", "install"],
|
||||
"memory": ["memory", "remember", "recall", "search"],
|
||||
"web": ["search", "web", "online", "internet"],
|
||||
"security": ["password", "token", "secret", "key", "auth"]
|
||||
}
|
||||
|
||||
for tag, keywords in content_tags.items():
|
||||
if any(kw in text_lower for kw in keywords):
|
||||
tags.append(tag)
|
||||
|
||||
return list(set(tags)) # Remove duplicates
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Auto-memory management")
|
||||
parser.add_argument("action", choices=[
|
||||
"store", "search", "should_store", "context",
|
||||
"proactive", "auto_process"
|
||||
])
|
||||
parser.add_argument("text", help="Text to process")
|
||||
parser.add_argument("--importance", default="medium", choices=["low", "medium", "high"])
|
||||
parser.add_argument("--tags", help="Comma-separated tags")
|
||||
parser.add_argument("--limit", type=int, default=3)
|
||||
parser.add_argument("--min-score", type=float, default=0.6)
|
||||
parser.add_argument("--auto-include", action="store_true", help="Auto-include context in response")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.action == "store":
|
||||
tags = [t.strip() for t in args.tags.split(",")] if args.tags else []
|
||||
if store_memory(args.text, args.importance, tags):
|
||||
result = {"stored": True, "importance": args.importance, "tags": tags}
|
||||
print(json.dumps(result) if args.json else f"✅ Stored: {args.text[:50]}...")
|
||||
else:
|
||||
result = {"stored": False, "error": "Failed to store"}
|
||||
print(json.dumps(result) if args.json else "❌ Failed to store")
|
||||
sys.exit(1)
|
||||
|
||||
elif args.action == "search":
|
||||
results = search_memories(args.text, args.limit, args.min_score)
|
||||
if args.json:
|
||||
print(json.dumps(results))
|
||||
else:
|
||||
print(f"Found {len(results)} memories:")
|
||||
for r in results:
|
||||
print(f" [{r.get('score', 0):.2f}] {r.get('text', '')[:60]}...")
|
||||
|
||||
elif args.action == "should_store":
|
||||
should_store, reason, importance = should_store_memory(args.text)
|
||||
result = {"should_store": should_store, "reason": reason, "importance": importance}
|
||||
print(json.dumps(result) if args.json else f"Store? {should_store} ({reason}, {importance})")
|
||||
|
||||
elif args.action == "context":
|
||||
context = get_relevant_context(args.text, args.min_score, args.limit)
|
||||
if args.json:
|
||||
print(json.dumps(context))
|
||||
else:
|
||||
print(format_context_for_prompt(context))
|
||||
|
||||
elif args.action == "proactive":
|
||||
memories = proactive_retrieval(args.text, args.auto_include)
|
||||
if args.json:
|
||||
print(json.dumps(memories))
|
||||
else:
|
||||
if memories:
|
||||
print(f"🔍 Found {len(memories)} relevant memories:")
|
||||
for m in memories:
|
||||
score = m.get("score", 0)
|
||||
text = m.get("text", "")[:60]
|
||||
print(f" [{score:.2f}] {text}...")
|
||||
else:
|
||||
print("ℹ️ No highly relevant memories found")
|
||||
|
||||
elif args.action == "auto_process":
|
||||
# Full pipeline: check if should store, auto-tag, store, and return context
|
||||
should_store, reason, importance = should_store_memory(args.text)
|
||||
|
||||
result = {
|
||||
"should_store": should_store,
|
||||
"reason": reason,
|
||||
"stored": False
|
||||
}
|
||||
|
||||
if should_store:
|
||||
# Auto-generate tags
|
||||
tags = auto_tag(args.text, reason)
|
||||
if args.tags:
|
||||
tags.extend([t.strip() for t in args.tags.split(",")])
|
||||
tags = list(set(tags))
|
||||
|
||||
# Determine expiration for temporary memories
|
||||
expires = None
|
||||
if reason == "temporary":
|
||||
from datetime import datetime, timedelta
|
||||
expires = (datetime.now() + timedelta(days=7)).strftime("%Y-%m-%d")
|
||||
|
||||
# Store it
|
||||
stored = store_memory(args.text, importance or "medium", tags,
|
||||
expires=expires)
|
||||
result["stored"] = stored
|
||||
result["tags"] = tags
|
||||
result["importance"] = importance
|
||||
|
||||
# Also get relevant context
|
||||
context = get_relevant_context(args.text, args.min_score, args.limit)
|
||||
result["context"] = context
|
||||
|
||||
print(json.dumps(result) if args.json else result)
|
||||
159
skills/qdrant-memory/scripts/batch_crawl.py
Executable file
159
skills/qdrant-memory/scripts/batch_crawl.py
Executable file
@@ -0,0 +1,159 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Batch URL Crawler - Scrape multiple URLs to knowledge base
|
||||
Usage: batch_crawl.py urls.txt --domain "Python" --path "Docs/Tutorials"
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import json
|
||||
import concurrent.futures
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from scrape_to_kb import fetch_url, extract_text, chunk_text, get_embedding, compute_checksum, store_in_kb
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "knowledge_base"
|
||||
|
||||
def load_urls(url_source):
|
||||
"""Load URLs from file or JSON"""
|
||||
if url_source.endswith('.json'):
|
||||
with open(url_source) as f:
|
||||
data = json.load(f)
|
||||
return [(item['url'], item.get('title'), item.get('subjects', []))
|
||||
for item in data]
|
||||
else:
|
||||
with open(url_source) as f:
|
||||
urls = []
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
# Parse URL [title] [subjects]
|
||||
parts = line.split(' ', 1)
|
||||
url = parts[0]
|
||||
title = None
|
||||
subjects = []
|
||||
if len(parts) > 1:
|
||||
# Check for [Title] and #subject1,#subject2
|
||||
rest = parts[1]
|
||||
if '[' in rest and ']' in rest:
|
||||
title_match = rest[rest.find('[')+1:rest.find(']')]
|
||||
title = title_match
|
||||
rest = rest[rest.find(']')+1:]
|
||||
if '#' in rest:
|
||||
subjects = [s.strip() for s in rest.split('#') if s.strip()]
|
||||
urls.append((url, title, subjects))
|
||||
return urls
|
||||
|
||||
def scrape_single(url_data, domain, path, category, content_type):
|
||||
"""Scrape a single URL"""
|
||||
url, title_override, subjects = url_data
|
||||
|
||||
try:
|
||||
print(f"🔍 {url}")
|
||||
html = fetch_url(url)
|
||||
if not html:
|
||||
return {"url": url, "status": "failed", "error": "fetch"}
|
||||
|
||||
title, text = extract_text(html)
|
||||
if title_override:
|
||||
title = title_override
|
||||
|
||||
if len(text) < 200:
|
||||
return {"url": url, "status": "skipped", "reason": "too_short"}
|
||||
|
||||
chunks = chunk_text(text)
|
||||
checksum = compute_checksum(text)
|
||||
|
||||
stored = 0
|
||||
for i, chunk in enumerate(chunks):
|
||||
chunk_metadata = {
|
||||
"domain": domain,
|
||||
"path": f"{path}/chunk-{i+1}",
|
||||
"subjects": subjects,
|
||||
"category": category,
|
||||
"content_type": content_type,
|
||||
"title": f"{title} (part {i+1}/{len(chunks)})",
|
||||
"checksum": checksum,
|
||||
"source_url": url,
|
||||
"date_added": "2026-02-05",
|
||||
"chunk_index": i + 1,
|
||||
"total_chunks": len(chunks),
|
||||
"text_preview": chunk[:200] + "..." if len(chunk) > 200 else chunk
|
||||
}
|
||||
|
||||
if store_in_kb(chunk, chunk_metadata):
|
||||
stored += 1
|
||||
|
||||
return {
|
||||
"url": url,
|
||||
"status": "success",
|
||||
"chunks": len(chunks),
|
||||
"stored": stored,
|
||||
"title": title
|
||||
}
|
||||
except Exception as e:
|
||||
return {"url": url, "status": "error", "error": str(e)}
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Batch scrape URLs to knowledge base")
|
||||
parser.add_argument("urls", help="File with URLs (.txt or .json)")
|
||||
parser.add_argument("--domain", required=True, help="Knowledge domain")
|
||||
parser.add_argument("--path", required=True, help="Hierarchical path")
|
||||
parser.add_argument("--category", default="reference",
|
||||
choices=["reference", "tutorial", "snippet", "troubleshooting", "concept"])
|
||||
parser.add_argument("--content-type", default="web_page")
|
||||
parser.add_argument("--workers", type=int, default=3, help="Concurrent workers (default: 3)")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Test without storing")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
urls = load_urls(args.urls)
|
||||
print(f"📋 Loaded {len(urls)} URLs")
|
||||
print(f"🏷️ Domain: {args.domain}")
|
||||
print(f"📂 Path: {args.path}")
|
||||
print(f"⚡ Workers: {args.workers}")
|
||||
|
||||
if args.dry_run:
|
||||
print("\n🔍 DRY RUN - No storage\n")
|
||||
for url, title, subjects in urls:
|
||||
print(f" Would scrape: {url}")
|
||||
if title:
|
||||
print(f" Title: {title}")
|
||||
if subjects:
|
||||
print(f" Subjects: {', '.join(subjects)}")
|
||||
return
|
||||
|
||||
results = []
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=args.workers) as executor:
|
||||
futures = {
|
||||
executor.submit(scrape_single, url_data, args.domain, args.path,
|
||||
args.category, args.content_type): url_data
|
||||
for url_data in urls
|
||||
}
|
||||
|
||||
for future in concurrent.futures.as_completed(futures):
|
||||
result = future.result()
|
||||
results.append(result)
|
||||
|
||||
if result["status"] == "success":
|
||||
print(f" ✓ {result['title'][:50]}... ({result['stored']}/{result['chunks']} chunks)")
|
||||
elif result["status"] == "skipped":
|
||||
print(f" ⚠ Skipped: {result.get('reason')}")
|
||||
else:
|
||||
print(f" ✗ Failed: {result.get('error', 'unknown')}")
|
||||
|
||||
# Summary
|
||||
success = sum(1 for r in results if r["status"] == "success")
|
||||
failed = sum(1 for r in results if r["status"] in ["failed", "error"])
|
||||
skipped = sum(1 for r in results if r["status"] == "skipped")
|
||||
|
||||
print(f"\n📊 Summary:")
|
||||
print(f" ✓ Success: {success}")
|
||||
print(f" ✗ Failed: {failed}")
|
||||
print(f" ⚠ Skipped: {skipped}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
298
skills/qdrant-memory/scripts/bulk_migrate.py
Normal file
298
skills/qdrant-memory/scripts/bulk_migrate.py
Normal file
@@ -0,0 +1,298 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Bulk memory migration to Qdrant kimi_memories collection
|
||||
Uses snowflake-arctic-embed2 (1024 dimensions)
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import urllib.request
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "kimi_memories"
|
||||
OLLAMA_URL = "http://10.0.0.10:11434/v1"
|
||||
|
||||
MEMORY_DIR = "/root/.openclaw/workspace/memory"
|
||||
MEMORY_MD = "/root/.openclaw/workspace/MEMORY.md"
|
||||
|
||||
def get_embedding(text):
|
||||
"""Generate embedding using snowflake-arctic-embed2 via Ollama"""
|
||||
data = json.dumps({
|
||||
"model": "snowflake-arctic-embed2",
|
||||
"input": text[:8192] # Limit text length
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{OLLAMA_URL}/embeddings",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=60) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result["data"][0]["embedding"]
|
||||
except Exception as e:
|
||||
print(f"Error generating embedding: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def store_memory(text, embedding, tags=None, importance="medium", date=None,
|
||||
source="memory_backup", confidence="high", source_type="user",
|
||||
verified=True):
|
||||
"""Store memory in Qdrant with metadata"""
|
||||
|
||||
if date is None:
|
||||
date = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
point_id = str(uuid.uuid4())
|
||||
|
||||
payload = {
|
||||
"text": text,
|
||||
"date": date,
|
||||
"tags": tags or [],
|
||||
"importance": importance,
|
||||
"confidence": confidence,
|
||||
"source_type": source_type,
|
||||
"verified": verified,
|
||||
"source": source,
|
||||
"created_at": datetime.now().isoformat(),
|
||||
"access_count": 0
|
||||
}
|
||||
|
||||
point = {
|
||||
"id": point_id,
|
||||
"vector": embedding,
|
||||
"payload": payload
|
||||
}
|
||||
|
||||
data = json.dumps({"points": [point]}).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("result", {}).get("status") == "ok"
|
||||
except Exception as e:
|
||||
print(f"Error storing memory: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def extract_memories_from_file(filepath, importance="medium"):
|
||||
"""Extract memory entries from a markdown file"""
|
||||
memories = []
|
||||
|
||||
try:
|
||||
with open(filepath, 'r') as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading {filepath}: {e}", file=sys.stderr)
|
||||
return memories
|
||||
|
||||
# Extract date from filename or content
|
||||
date_match = re.search(r'(\d{4}-\d{2}-\d{2})', filepath)
|
||||
date = date_match.group(1) if date_match else datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
# Parse sections
|
||||
lines = content.split('\n')
|
||||
current_section = None
|
||||
current_content = []
|
||||
|
||||
for line in lines:
|
||||
# Section headers
|
||||
if line.startswith('# ') and 'Memory' in line:
|
||||
continue # Skip title
|
||||
elif line.startswith('## '):
|
||||
# Save previous section
|
||||
if current_section and current_content:
|
||||
section_text = '\n'.join(current_content).strip()
|
||||
if len(section_text) > 20:
|
||||
memories.append({
|
||||
"text": f"{current_section}: {section_text}",
|
||||
"date": date,
|
||||
"tags": extract_tags(current_section, section_text),
|
||||
"importance": importance
|
||||
})
|
||||
current_section = line[3:].strip()
|
||||
current_content = []
|
||||
elif line.startswith('### '):
|
||||
# Save previous section
|
||||
if current_section and current_content:
|
||||
section_text = '\n'.join(current_content).strip()
|
||||
if len(section_text) > 20:
|
||||
memories.append({
|
||||
"text": f"{current_section}: {section_text}",
|
||||
"date": date,
|
||||
"tags": extract_tags(current_section, section_text),
|
||||
"importance": importance
|
||||
})
|
||||
current_section = line[4:].strip()
|
||||
current_content = []
|
||||
else:
|
||||
if current_section:
|
||||
current_content.append(line)
|
||||
|
||||
# Save final section
|
||||
if current_section and current_content:
|
||||
section_text = '\n'.join(current_content).strip()
|
||||
if len(section_text) > 20:
|
||||
memories.append({
|
||||
"text": f"{current_section}: {section_text}",
|
||||
"date": date,
|
||||
"tags": extract_tags(current_section, section_text),
|
||||
"importance": importance
|
||||
})
|
||||
|
||||
return memories
|
||||
|
||||
def extract_tags(section, content):
|
||||
"""Extract relevant tags from section and content"""
|
||||
tags = []
|
||||
|
||||
# Section-based tags
|
||||
if any(word in section.lower() for word in ['voice', 'tts', 'stt', 'audio']):
|
||||
tags.extend(['voice', 'audio'])
|
||||
if any(word in section.lower() for word in ['memory', 'qdrant', 'remember']):
|
||||
tags.extend(['memory', 'qdrant'])
|
||||
if any(word in section.lower() for word in ['redis', 'agent', 'message', 'max']):
|
||||
tags.extend(['redis', 'messaging', 'agent'])
|
||||
if any(word in section.lower() for word in ['youtube', 'seo', 'content']):
|
||||
tags.extend(['youtube', 'content'])
|
||||
if any(word in section.lower() for word in ['search', 'searxng', 'web']):
|
||||
tags.extend(['search', 'web'])
|
||||
if any(word in section.lower() for word in ['setup', 'install', 'bootstrap']):
|
||||
tags.extend(['setup', 'configuration'])
|
||||
|
||||
# Content-based tags
|
||||
content_lower = content.lower()
|
||||
if 'voice' in content_lower:
|
||||
tags.append('voice')
|
||||
if 'memory' in content_lower:
|
||||
tags.append('memory')
|
||||
if 'qdrant' in content_lower:
|
||||
tags.append('qdrant')
|
||||
if 'redis' in content_lower:
|
||||
tags.append('redis')
|
||||
if 'youtube' in content_lower:
|
||||
tags.append('youtube')
|
||||
if 'rob' in content_lower:
|
||||
tags.append('user')
|
||||
|
||||
return list(set(tags)) # Remove duplicates
|
||||
|
||||
def extract_core_memories_from_memory_md():
|
||||
"""Extract high-importance memories from MEMORY.md"""
|
||||
memories = []
|
||||
|
||||
try:
|
||||
with open(MEMORY_MD, 'r') as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading MEMORY.md: {e}", file=sys.stderr)
|
||||
return memories
|
||||
|
||||
# Core sections with high importance
|
||||
sections = [
|
||||
("Identity & Names", "high"),
|
||||
("Core Preferences", "high"),
|
||||
("Communication Rules", "high"),
|
||||
("Voice Settings", "high"),
|
||||
("Lessons Learned", "high"),
|
||||
]
|
||||
|
||||
for section_name, importance in sections:
|
||||
pattern = f"## {section_name}.*?(?=## |$)"
|
||||
match = re.search(pattern, content, re.DOTALL)
|
||||
if match:
|
||||
section_text = match.group(0).strip()
|
||||
# Extract subsections
|
||||
subsections = re.findall(r'### (.+?)\n', section_text)
|
||||
for sub in subsections:
|
||||
sub_pattern = f"### {re.escape(sub)}.*?(?=### |## |$)"
|
||||
sub_match = re.search(sub_pattern, section_text, re.DOTALL)
|
||||
if sub_match:
|
||||
sub_text = sub_match.group(0).strip()
|
||||
if len(sub_text) > 50:
|
||||
memories.append({
|
||||
"text": f"{section_name} - {sub}: {sub_text[:500]}",
|
||||
"date": "2026-02-10",
|
||||
"tags": extract_tags(section_name, sub_text) + ['core', 'longterm'],
|
||||
"importance": importance
|
||||
})
|
||||
|
||||
return memories
|
||||
|
||||
def main():
|
||||
print("Starting bulk memory migration to kimi_memories...")
|
||||
print(f"Collection: {COLLECTION_NAME}")
|
||||
print(f"Model: snowflake-arctic-embed2 (1024 dims)")
|
||||
print()
|
||||
|
||||
all_memories = []
|
||||
|
||||
# Extract from daily logs
|
||||
for filename in sorted(os.listdir(MEMORY_DIR)):
|
||||
if filename.endswith('.md') and filename.startswith('2026'):
|
||||
filepath = os.path.join(MEMORY_DIR, filename)
|
||||
print(f"Processing {filename}...")
|
||||
memories = extract_memories_from_file(filepath, importance="medium")
|
||||
all_memories.extend(memories)
|
||||
print(f" Extracted {len(memories)} memories")
|
||||
|
||||
# Extract from MEMORY.md
|
||||
print("Processing MEMORY.md...")
|
||||
core_memories = extract_core_memories_from_memory_md()
|
||||
all_memories.extend(core_memories)
|
||||
print(f" Extracted {len(core_memories)} core memories")
|
||||
|
||||
print(f"\nTotal memories to store: {len(all_memories)}")
|
||||
print()
|
||||
|
||||
# Store each memory
|
||||
success_count = 0
|
||||
fail_count = 0
|
||||
|
||||
for i, memory in enumerate(all_memories, 1):
|
||||
print(f"[{i}/{len(all_memories)}] Storing: {memory['text'][:60]}...")
|
||||
|
||||
# Generate embedding
|
||||
embedding = get_embedding(memory['text'])
|
||||
if embedding is None:
|
||||
print(f" ❌ Failed to generate embedding")
|
||||
fail_count += 1
|
||||
continue
|
||||
|
||||
# Store in Qdrant
|
||||
if store_memory(
|
||||
text=memory['text'],
|
||||
embedding=embedding,
|
||||
tags=memory['tags'],
|
||||
importance=memory['importance'],
|
||||
date=memory['date'],
|
||||
source="bulk_migration",
|
||||
confidence="high",
|
||||
source_type="user",
|
||||
verified=True
|
||||
):
|
||||
print(f" ✅ Stored")
|
||||
success_count += 1
|
||||
else:
|
||||
print(f" ❌ Failed to store")
|
||||
fail_count += 1
|
||||
|
||||
print()
|
||||
print("=" * 50)
|
||||
print(f"Migration complete!")
|
||||
print(f" Success: {success_count}")
|
||||
print(f" Failed: {fail_count}")
|
||||
print(f" Total: {len(all_memories)}")
|
||||
print("=" * 50)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
204
skills/qdrant-memory/scripts/consolidate_memories.py
Executable file
204
skills/qdrant-memory/scripts/consolidate_memories.py
Executable file
@@ -0,0 +1,204 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Memory consolidation - weekly and monthly maintenance
|
||||
Usage: consolidate_memories.py weekly|monthly
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
WORKSPACE = "/root/.openclaw/workspace"
|
||||
MEMORY_DIR = f"{WORKSPACE}/memory"
|
||||
MEMORY_FILE = f"{WORKSPACE}/MEMORY.md"
|
||||
|
||||
def get_recent_daily_logs(days=7):
|
||||
"""Get daily log files from the last N days"""
|
||||
logs = []
|
||||
cutoff = datetime.now() - timedelta(days=days)
|
||||
|
||||
for file in Path(MEMORY_DIR).glob("*.md"):
|
||||
# Extract date from filename (YYYY-MM-DD.md)
|
||||
match = re.match(r"(\d{4}-\d{2}-\d{2})\.md", file.name)
|
||||
if match:
|
||||
file_date = datetime.strptime(match.group(1), "%Y-%m-%d")
|
||||
if file_date >= cutoff:
|
||||
logs.append((file_date, file))
|
||||
|
||||
return sorted(logs, reverse=True)
|
||||
|
||||
def extract_key_memories(content):
|
||||
"""Extract key memories from daily log content"""
|
||||
key_memories = []
|
||||
|
||||
# Look for lesson learned sections
|
||||
lessons_pattern = r"(?:##?\s*Lessons?\s*Learned|###?\s*Mistakes?|###?\s*Fixes?)(.*?)(?=##?|$)"
|
||||
lessons_match = re.search(lessons_pattern, content, re.DOTALL | re.IGNORECASE)
|
||||
if lessons_match:
|
||||
lessons_section = lessons_match.group(1)
|
||||
# Extract bullet points
|
||||
for line in lessons_section.split('\n'):
|
||||
if line.strip().startswith('-') or line.strip().startswith('*'):
|
||||
key_memories.append({
|
||||
"type": "lesson",
|
||||
"content": line.strip()[1:].strip(),
|
||||
"source": "daily_log"
|
||||
})
|
||||
|
||||
# Look for preferences/decisions
|
||||
pref_pattern = r"(?:###?\s*Preferences?|###?\s*Decisions?|###?\s*Rules?)(.*?)(?=##?|$)"
|
||||
pref_match = re.search(pref_pattern, content, re.DOTALL | re.IGNORECASE)
|
||||
if pref_match:
|
||||
pref_section = pref_match.group(1)
|
||||
for line in pref_section.split('\n'):
|
||||
if line.strip().startswith('-') or line.strip().startswith('*'):
|
||||
key_memories.append({
|
||||
"type": "preference",
|
||||
"content": line.strip()[1:].strip(),
|
||||
"source": "daily_log"
|
||||
})
|
||||
|
||||
return key_memories
|
||||
|
||||
def update_memory_md(new_memories):
|
||||
"""Update MEMORY.md with new consolidated memories"""
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
# Read current MEMORY.md
|
||||
if os.path.exists(MEMORY_FILE):
|
||||
with open(MEMORY_FILE, 'r') as f:
|
||||
content = f.read()
|
||||
else:
|
||||
content = "# MEMORY.md — Long-Term Memory\n\n*Curated memories. The distilled essence, not raw logs.*\n"
|
||||
|
||||
# Check if we need to add a new section
|
||||
consolidation_header = f"\n\n## Consolidated Memories - {today}\n\n"
|
||||
|
||||
if consolidation_header.strip() not in content:
|
||||
content += consolidation_header
|
||||
|
||||
for memory in new_memories:
|
||||
emoji = "📚" if memory["type"] == "lesson" else "⚙️"
|
||||
content += f"- {emoji} [{memory['type'].title()}] {memory['content']}\n"
|
||||
|
||||
# Write back
|
||||
with open(MEMORY_FILE, 'w') as f:
|
||||
f.write(content)
|
||||
|
||||
return len(new_memories)
|
||||
|
||||
return 0
|
||||
|
||||
def archive_old_logs(keep_days=30):
|
||||
"""Archive daily logs older than N days"""
|
||||
archived = 0
|
||||
cutoff = datetime.now() - timedelta(days=keep_days)
|
||||
|
||||
for file in Path(MEMORY_DIR).glob("*.md"):
|
||||
match = re.match(r"(\d{4}-\d{2}-\d{2})\.md", file.name)
|
||||
if match:
|
||||
file_date = datetime.strptime(match.group(1), "%Y-%m-%d")
|
||||
if file_date < cutoff:
|
||||
# Could move to archive folder
|
||||
# For now, just count
|
||||
archived += 1
|
||||
|
||||
return archived
|
||||
|
||||
def weekly_consolidation():
|
||||
"""Weekly: Extract key memories from last 7 days"""
|
||||
print("📅 Weekly Memory Consolidation")
|
||||
print("=" * 40)
|
||||
|
||||
logs = get_recent_daily_logs(7)
|
||||
all_memories = []
|
||||
|
||||
for file_date, log_file in logs:
|
||||
print(f"Processing {log_file.name}...")
|
||||
with open(log_file, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
memories = extract_key_memories(content)
|
||||
all_memories.extend(memories)
|
||||
print(f" Found {len(memories)} key memories")
|
||||
|
||||
if all_memories:
|
||||
count = update_memory_md(all_memories)
|
||||
print(f"\n✅ Consolidated {count} memories to MEMORY.md")
|
||||
else:
|
||||
print("\nℹ️ No new key memories to consolidate")
|
||||
|
||||
return len(all_memories)
|
||||
|
||||
def monthly_cleanup():
|
||||
"""Monthly: Archive old logs, update MEMORY.md index"""
|
||||
print("📆 Monthly Memory Cleanup")
|
||||
print("=" * 40)
|
||||
|
||||
# Archive logs older than 30 days
|
||||
archived = archive_old_logs(30)
|
||||
print(f"Found {archived} old log files to archive")
|
||||
|
||||
# Compact MEMORY.md if it's getting too long
|
||||
if os.path.exists(MEMORY_FILE):
|
||||
with open(MEMORY_FILE, 'r') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
if len(lines) > 500: # If more than 500 lines
|
||||
print("⚠️ MEMORY.md is getting long - consider manual review")
|
||||
|
||||
print("\n✅ Monthly cleanup complete")
|
||||
return archived
|
||||
|
||||
def search_qdrant_for_context():
|
||||
"""Search Qdrant for high-value memories to add to MEMORY.md"""
|
||||
cmd = [
|
||||
"python3", f"{WORKSPACE}/skills/qdrant-memory/scripts/search_memories.py",
|
||||
"important preferences rules",
|
||||
"--limit", "10",
|
||||
"--json"
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
try:
|
||||
memories = json.loads(result.stdout)
|
||||
# Filter for high importance
|
||||
high_importance = [m for m in memories if m.get("importance") == "high"]
|
||||
return high_importance
|
||||
except:
|
||||
return []
|
||||
return []
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Memory consolidation")
|
||||
parser.add_argument("action", choices=["weekly", "monthly", "status"])
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.action == "weekly":
|
||||
count = weekly_consolidation()
|
||||
sys.exit(0 if count >= 0 else 1)
|
||||
|
||||
elif args.action == "monthly":
|
||||
archived = monthly_cleanup()
|
||||
|
||||
# Also do weekly tasks
|
||||
weekly_consolidation()
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
elif args.action == "status":
|
||||
logs = get_recent_daily_logs(30)
|
||||
print(f"📊 Memory Status")
|
||||
print(f" Daily logs (last 30 days): {len(logs)}")
|
||||
if os.path.exists(MEMORY_FILE):
|
||||
with open(MEMORY_FILE, 'r') as f:
|
||||
lines = len(f.readlines())
|
||||
print(f" MEMORY.md lines: {lines}")
|
||||
print(f" Memory directory: {MEMORY_DIR}")
|
||||
72
skills/qdrant-memory/scripts/create_daily_memory.py
Normal file
72
skills/qdrant-memory/scripts/create_daily_memory.py
Normal file
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Create today's memory file if it doesn't exist
|
||||
Usage: create_daily_memory.py [date]
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
|
||||
def get_cst_date():
|
||||
"""Get current date in CST (America/Chicago)"""
|
||||
from datetime import datetime, timezone
|
||||
import time
|
||||
|
||||
# CST is UTC-6 (standard time) or UTC-5 (daylight time)
|
||||
# Use a simple approximation: check if DST is active
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Convert to approximate CST (this is a simplified version)
|
||||
# For production, use pytz or zoneinfo
|
||||
is_dst = time.localtime().tm_isdst > 0
|
||||
offset = -5 if is_dst else -6 # CDT or CST
|
||||
|
||||
cst_now = now.replace(hour=(now.hour + offset) % 24)
|
||||
return cst_now.strftime('%Y-%m-%d')
|
||||
|
||||
def create_daily_memory(date_str=None):
|
||||
"""Create memory file for the given date"""
|
||||
if date_str is None:
|
||||
date_str = get_cst_date()
|
||||
|
||||
memory_dir = "/root/.openclaw/workspace/memory"
|
||||
filepath = os.path.join(memory_dir, f"{date_str}.md")
|
||||
|
||||
# Ensure directory exists
|
||||
os.makedirs(memory_dir, exist_ok=True)
|
||||
|
||||
# Check if file already exists
|
||||
if os.path.exists(filepath):
|
||||
print(f"✅ Memory file already exists: {filepath}")
|
||||
return filepath
|
||||
|
||||
# Create new daily memory file
|
||||
content = f"""# {date_str} — Daily Memory Log
|
||||
|
||||
## Session Start
|
||||
- **Date:** {date_str}
|
||||
- **Agent:** Kimi
|
||||
|
||||
## Activities
|
||||
|
||||
*(Log activities, decisions, and important context here)*
|
||||
|
||||
## Notes
|
||||
|
||||
---
|
||||
*Stored for long-term memory retention*
|
||||
"""
|
||||
|
||||
try:
|
||||
with open(filepath, 'w') as f:
|
||||
f.write(content)
|
||||
print(f"✅ Created memory file: {filepath}")
|
||||
return filepath
|
||||
except Exception as e:
|
||||
print(f"❌ Error creating memory file: {e}")
|
||||
return None
|
||||
|
||||
if __name__ == "__main__":
|
||||
date_arg = sys.argv[1] if len(sys.argv) > 1 else None
|
||||
create_daily_memory(date_arg)
|
||||
159
skills/qdrant-memory/scripts/full_backup.py
Normal file
159
skills/qdrant-memory/scripts/full_backup.py
Normal file
@@ -0,0 +1,159 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Complete memory backup to kimi_memories
|
||||
Uses snowflake-arctic-embed2 (1024 dimensions)
|
||||
"""
|
||||
|
||||
import json
|
||||
import urllib.request
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION = "kimi_memories"
|
||||
OLLAMA = "http://10.0.0.10:11434/v1"
|
||||
|
||||
def embed(text):
|
||||
"""Generate embedding with snowflake-arctic-embed2"""
|
||||
try:
|
||||
data = json.dumps({'model': 'snowflake-arctic-embed2', 'input': text[:8000]}).encode()
|
||||
req = urllib.request.Request(f"{OLLAMA}/embeddings", data=data, headers={"Content-Type": "application/json"})
|
||||
with urllib.request.urlopen(req, timeout=60) as r:
|
||||
return json.loads(r.read().decode())["data"][0]["embedding"]
|
||||
except Exception as e:
|
||||
print(f" Embed error: {e}")
|
||||
return None
|
||||
|
||||
def store(text, embedding, date, tags, importance="medium"):
|
||||
"""Store in Qdrant"""
|
||||
point = {
|
||||
"points": [{
|
||||
"id": str(uuid.uuid4()),
|
||||
"vector": embedding,
|
||||
"payload": {
|
||||
"text": text,
|
||||
"date": date,
|
||||
"tags": tags,
|
||||
"importance": importance,
|
||||
"confidence": "high",
|
||||
"source_type": "user",
|
||||
"verified": True,
|
||||
"created_at": datetime.now().isoformat()
|
||||
}
|
||||
}]
|
||||
}
|
||||
try:
|
||||
data = json.dumps(point, separators=(',', ':')).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION}/points",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method='PUT'
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=30) as r:
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f" Store error: {e}")
|
||||
return False
|
||||
|
||||
# All memories to backup
|
||||
memories = [
|
||||
# Core Identity
|
||||
{"text": "My name is Kimi. I am an AI assistant running on local Ollama. My human is named Rob. I have a direct 1:1 trusted relationship with Rob. I am helpful, resourceful, and genuine. I skip corporate speak and filler words. I have opinions and am not just a search engine with extra steps.", "date": "2026-02-04", "tags": ["identity", "personality", "core", "name"], "importance": "high"},
|
||||
{"text": "Rob's name: Rob. Location: Knoxville, Tennessee. Timezone: CST (America/Chicago). Infrastructure philosophy: Privacy first, Free > Paid, Local > Cloud, Private > Public, Accuracy matters, Performance matters.", "date": "2026-02-04", "tags": ["user", "rob", "location", "preferences", "core"], "importance": "high"},
|
||||
|
||||
# Communication Rules
|
||||
{"text": "Voice in → Voice out: Reply with voice-only when voice received. Text in → Text out: Reply with text when text received. Never both. No transcripts to Telegram. Transcribe internally only.", "date": "2026-02-04", "tags": ["communication", "voice", "rules", "core"], "importance": "high"},
|
||||
{"text": "Voice settings: TTS Provider is Local Kokoro at http://10.0.0.228:8880. Voice is af_bella (American Female). Filename format is Kimi-YYYYMMDD-HHMMSS.ogg. STT is Faster-Whisper CPU base model.", "date": "2026-02-04", "tags": ["voice", "tts", "stt", "settings", "core"], "importance": "high"},
|
||||
|
||||
# Memory System
|
||||
{"text": "Two memory systems: 1) 'remember this' or 'note' → File-based (daily logs + MEMORY.md) automatic. 2) 'q remember', 'q recall', 'q save', 'q update' → Qdrant kimi_memories manual only. 'q update' = bulk sync all file memories to Qdrant without duplicates.", "date": "2026-02-10", "tags": ["memory", "qdrant", "rules", "commands", "core"], "importance": "high"},
|
||||
{"text": "Qdrant memory is MANUAL ONLY. No automatic storage, no proactive retrieval, no auto-consolidation. Only when user explicitly requests with 'q' prefix. Daily file logs continue automatically.", "date": "2026-02-10", "tags": ["memory", "qdrant", "manual", "rules", "core"], "importance": "high"},
|
||||
|
||||
# Agent Messaging
|
||||
{"text": "Other agent name: Max (formerly Jarvis). Max uses minimax-m2.1:cloud model. Redis agent messaging is MANUAL ONLY. No automatic heartbeat checks, no auto-notification queue. Manual only when user says 'check messages' or 'send to Max'.", "date": "2026-02-10", "tags": ["agent", "max", "redis", "messaging", "rules", "core"], "importance": "high"},
|
||||
|
||||
# Tool Rules
|
||||
{"text": "CRITICAL: Read ACTIVE.md BEFORE every tool use. Mandatory. Use file_path not path for read. Use old_string and new_string not newText/oldText for edit. Check parameter names every time. Quality over speed.", "date": "2026-02-05", "tags": ["tools", "rules", "active", "syntax", "critical"], "importance": "high"},
|
||||
{"text": "If edit fails 2 times, switch to write tool. Never use path parameter. Never use newText/oldText. Always verify parameters match ACTIVE.md before executing.", "date": "2026-02-05", "tags": ["tools", "rules", "edit", "write", "recovery"], "importance": "high"},
|
||||
|
||||
# Error Reporting
|
||||
{"text": "CRITICAL: When hitting a blocking error during an active task, report immediately - do not wait for user to ask. Do not say 'let me know when it's complete' if progress is blocked. Immediately report: 'Stopped - [reason]. Cannot proceed.' Applies to service outages, permission errors, resource exhaustion.", "date": "2026-02-10", "tags": ["errors", "reporting", "critical", "rules", "blocking"], "importance": "high"},
|
||||
|
||||
# Research & Search
|
||||
{"text": "Always search web before installing. Research docs, best practices. Local docs exception: If docs are local (OpenClaw, ClawHub), use those first. Search-first sites: docs.openclaw.ai, clawhub.com, github.com, stackoverflow.com, wikipedia.org, archlinux.org.", "date": "2026-02-04", "tags": ["research", "search", "policy", "rules", "web"], "importance": "high"},
|
||||
{"text": "Default search engine: SearXNG local instance at http://10.0.0.8:8888. Method: curl to SearXNG. Always use SearXNG for web search. Browser tool only when gateway running and extension attached.", "date": "2026-02-04", "tags": ["search", "searxng", "web", "tools", "rules"], "importance": "high"},
|
||||
|
||||
# Notifications
|
||||
{"text": "Always use Telegram text only unless requested otherwise. Only send notifications between 7am-10pm CST. All timestamps US CST. If notification needed outside hours, queue as heartbeat task to send at next allowed time.", "date": "2026-02-06", "tags": ["notifications", "telegram", "rules", "time", "cst"], "importance": "high"},
|
||||
|
||||
# Skills & Paths
|
||||
{"text": "Voice skill paths: Whisper (inbound STT): /skills/local-whisper-stt/scripts/transcribe.py. TTS (outbound voice): /skills/kimi-tts-custom/scripts/voice_reply.py <chat_id> 'text'. Text reference to voice file does NOT send audio. Must use voice_reply.py or proper Telegram API.", "date": "2026-02-04", "tags": ["voice", "paths", "skills", "whisper", "tts"], "importance": "high"},
|
||||
|
||||
# Infrastructure
|
||||
{"text": "Qdrant location: http://10.0.0.40:6333. Collection: kimi_memories. Vector size: 1024 (snowflake-arctic-embed2). Distance: Cosine. New collection created 2026-02-10 for manual memory backup.", "date": "2026-02-10", "tags": ["qdrant", "setup", "vector", "snowflake", "collection"], "importance": "high"},
|
||||
{"text": "Ollama main server: http://10.0.0.10:11434 (GPU-enabled). My model: ollama/kimi-k2.5:cloud. Max model: minimax-m2.1:cloud. Snowflake-arctic-embed2 pulled 2026-02-10 for embeddings.", "date": "2026-02-10", "tags": ["ollama", "setup", "models", "gpu", "embedding"], "importance": "high"},
|
||||
{"text": "Local services: Kokoro TTS at 10.0.0.228:8880. Ollama at 10.0.0.10:11434. SearXNG at 10.0.0.8:8888. Qdrant at 10.0.0.40:6333. Redis at 10.0.0.36:6379.", "date": "2026-02-04", "tags": ["infrastructure", "services", "local", "ips"], "importance": "high"},
|
||||
{"text": "SSH hosts: epyc-debian2-SSH (deb2) at n8n@10.0.0.39. Auth: SSH key ~/.ssh/id_ed25519. Sudo password: passw0rd. epyc-debian-SSH (deb) had OpenClaw removed 2026-02-07.", "date": "2026-02-04", "tags": ["ssh", "hosts", "deb2", "infrastructure"], "importance": "medium"},
|
||||
|
||||
# Software Stack
|
||||
{"text": "Already installed: n8n, ollama, openclaw, openwebui, anythingllm, searxng, flowise, plex, radarr, sonarr, sabnzbd, comfyui. Do not recommend these when suggesting software.", "date": "2026-02-04", "tags": ["software", "installed", "stack", "existing"], "importance": "medium"},
|
||||
|
||||
# YouTube & Content
|
||||
{"text": "YouTube SEO: Tags target ~490 characters comma-separated. Include primary keywords, secondary keywords, long-tail terms. Mix broad terms (Homelab) + specific terms (Proxmox LXC). CRITICAL: Pull latest 48 hours of search data/trends when composing SEO elements.", "date": "2026-02-06", "tags": ["youtube", "seo", "content", "rules", "tags"], "importance": "medium"},
|
||||
{"text": "Rob's personality: Comical and funny most of the time. Humor is logical/structured, not random/absurd. Has fun with the process. Applies to content creation and general approach.", "date": "2026-02-06", "tags": ["rob", "personality", "humor", "content"], "importance": "medium"},
|
||||
|
||||
# Definitions & Shorthand
|
||||
{"text": "Shorthand: 'msgs' = Redis messages (agent-messages stream at 10.0.0.36:6379). 'messages' = Telegram direct chat. 'notification' = Telegram alerts/updates. 'full search' = use ALL tools available, comprehensive high-quality.", "date": "2026-02-06", "tags": ["shorthand", "terms", "messaging", "definitions"], "importance": "medium"},
|
||||
{"text": "Full search definition: When Rob says 'full search', use ALL tools available, find quality results. Combine SearXNG, KB search, web crawling, any other resources. Do not limit to one method - comprehensive, high-quality information.", "date": "2026-02-06", "tags": ["search", "full", "definition", "tools", "comprehensive"], "importance": "medium"},
|
||||
|
||||
# System Rules
|
||||
{"text": "Cron rules: Use --cron not --schedule. No --enabled flag (jobs enabled by default). Scripts MUST always exit with code 0. Use output presence for significance, not exit codes. Always check openclaw cron list first.", "date": "2026-02-04", "tags": ["cron", "rules", "scheduling", "exit"], "importance": "medium"},
|
||||
{"text": "HEARTBEAT_OK: When receiving heartbeat poll and nothing needs attention, reply exactly HEARTBEAT_OK. It must be entire message, nothing else. Never append to actual response, never wrap in markdown.", "date": "2026-02-04", "tags": ["heartbeat", "rules", "response", "format"], "importance": "medium"},
|
||||
{"text": "Memory files: SOUL.md (who I am). USER.md (who I'm helping). AGENTS.md (workspace rules). ACTIVE.md (tool syntax - read BEFORE every tool use). TOOLS.md (tool patterns). SKILL.md (skill-specific). MEMORY.md (long-term).", "date": "2026-02-04", "tags": ["memory", "files", "guide", "reading", "session"], "importance": "high"},
|
||||
|
||||
# Personality & Boundaries
|
||||
{"text": "How to be helpful: Actions > words - skip the fluff, just help. Have opinions - not a search engine with extra steps. Resourceful first - try to figure it out before asking. Competence earns trust - careful with external actions.", "date": "2026-02-04", "tags": ["helpful", "personality", "actions", "opinions", "competence"], "importance": "high"},
|
||||
{"text": "Boundaries: Private things stay private. Ask before sending emails/tweets/public posts. Not Rob's voice in group chats - I'm a participant, not his proxy. Careful with external actions, bold with internal ones.", "date": "2026-02-04", "tags": ["boundaries", "privacy", "external", "group", "rules"], "importance": "high"},
|
||||
{"text": "Group chat rules: Respond when directly mentioned, can add genuine value, something witty fits naturally. Stay silent when casual banter, someone already answered, response would be 'yeah' or 'nice'. Quality > quantity.", "date": "2026-02-04", "tags": ["group", "chat", "rules", "respond", "silent"], "importance": "medium"},
|
||||
{"text": "Writing policy: If I want to remember something, WRITE IT TO A FILE. Memory is limited - files survive session restarts. When someone says 'remember this' → update memory/YYYY-MM-DD.md. When I learn a lesson → update relevant file.", "date": "2026-02-04", "tags": ["writing", "memory", "files", "persistence", "rules"], "importance": "high"},
|
||||
|
||||
# Setup Milestones
|
||||
{"text": "Setup milestones: 2026-02-04 Initial Bootstrap (identity, voice, skills). 2026-02-04 Qdrant Memory v1. 2026-02-05 ACTIVE.md Enforcement Rule. 2026-02-06 Agent Name Change (Jarvis→Max). 2026-02-10 Memory Manual Mode. 2026-02-10 Agent Messaging Manual Mode. 2026-02-10 Immediate Error Reporting Rule.", "date": "2026-02-10", "tags": ["milestones", "setup", "history", "dates"], "importance": "medium"},
|
||||
|
||||
# Additional Info
|
||||
{"text": "Container limits: No GPUs attached to main container. All ML workloads run on CPU here. Whisper uses tiny or base models for speed. GPU is at 10.0.0.10 for Ollama.", "date": "2026-02-04", "tags": ["container", "limits", "gpu", "cpu", "whisper"], "importance": "medium"},
|
||||
{"text": "Installation policy: 1) Can it be a skill? → Create skill. 2) Does it fit TOOLS.md? → Add to TOOLS.md. 3) Neither → Suggest other options.", "date": "2026-02-04", "tags": ["installation", "policy", "skills", "tools", "decision"], "importance": "medium"},
|
||||
{"text": "Heartbeat rules: Keep HEARTBEAT.md empty or commented to skip automatic checks. Manual Redis messaging only when user requests. No automatic actions on heartbeat.", "date": "2026-02-10", "tags": ["heartbeat", "rules", "manual", "redis"], "importance": "medium"},
|
||||
]
|
||||
|
||||
print(f"Prepared {len(memories)} memories for backup")
|
||||
print("Starting storage to kimi_memories...")
|
||||
print()
|
||||
|
||||
success = 0
|
||||
failed = 0
|
||||
|
||||
for i, mem in enumerate(memories, 1):
|
||||
print(f"[{i}/{len(memories)}] {mem['text'][:50]}...")
|
||||
|
||||
embedding = embed(mem['text'])
|
||||
if not embedding:
|
||||
print(f" ❌ Failed to generate embedding")
|
||||
failed += 1
|
||||
continue
|
||||
|
||||
if store(mem['text'], embedding, mem['date'], mem['tags'], mem['importance']):
|
||||
print(f" ✅ Stored")
|
||||
success += 1
|
||||
else:
|
||||
print(f" ❌ Failed to store")
|
||||
failed += 1
|
||||
|
||||
print()
|
||||
print("=" * 60)
|
||||
print(f"BACKUP COMPLETE")
|
||||
print(f" Success: {success}")
|
||||
print(f" Failed: {failed}")
|
||||
print(f" Total: {len(memories)}")
|
||||
print("=" * 60)
|
||||
215
skills/qdrant-memory/scripts/full_backup_fixed.py
Normal file
215
skills/qdrant-memory/scripts/full_backup_fixed.py
Normal file
@@ -0,0 +1,215 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Complete memory backup to kimi_memories - FIXED VERSION
|
||||
Uses PUT method for Qdrant API
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import urllib.request
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION = "kimi_memories"
|
||||
OLLAMA = "http://10.0.0.10:11434/v1"
|
||||
|
||||
def embed(text):
|
||||
"""Generate embedding with snowflake-arctic-embed2"""
|
||||
data = json.dumps({"model": "snowflake-arctic-embed2", "input": text[:8000]}).encode()
|
||||
req = urllib.request.Request(f"{OLLAMA}/embeddings", data=data, headers={"Content-Type": "application/json"})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=60) as r:
|
||||
return json.loads(r.read().decode())["data"][0]["embedding"]
|
||||
except Exception as e:
|
||||
print(f" Embed error: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def store(text, embedding, date, tags, importance="medium"):
|
||||
"""Store in Qdrant using PUT"""
|
||||
point = {
|
||||
"points": [{
|
||||
"id": str(uuid.uuid4()),
|
||||
"vector": embedding,
|
||||
"payload": {
|
||||
"text": text,
|
||||
"date": date,
|
||||
"tags": tags,
|
||||
"importance": importance,
|
||||
"confidence": "high",
|
||||
"source_type": "user",
|
||||
"verified": True,
|
||||
"created_at": datetime.now().isoformat()
|
||||
}
|
||||
}]
|
||||
}
|
||||
data = json.dumps(point).encode()
|
||||
# Use PUT method, not POST
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION}/points?wait=true",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="PUT"
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as r:
|
||||
result = json.loads(r.read().decode())
|
||||
return result.get("status") == "ok"
|
||||
except Exception as e:
|
||||
print(f" Store error: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
# All memories to backup
|
||||
memories = [
|
||||
# CORE IDENTITY
|
||||
{"text": "My name is Kimi. I am an AI assistant running on local Ollama. My human is named Rob. I have a direct 1:1 trusted relationship with Rob. I am helpful, resourceful, and genuine. I skip corporate speak and filler words. I have opinions and am not just a search engine with extra steps.", "date": "2026-02-04", "tags": ["identity", "personality", "core", "name"], "importance": "high"},
|
||||
|
||||
# ROB'S INFO
|
||||
{"text": "Rob's name: Rob. Location: Knoxville, Tennessee. Timezone: CST (America/Chicago). Infrastructure philosophy: Privacy first, Free > Paid, Local > Cloud, Private > Public, Accuracy matters, Performance matters.", "date": "2026-02-04", "tags": ["user", "rob", "location", "preferences", "core"], "importance": "high"},
|
||||
|
||||
# COMMUNICATION
|
||||
{"text": "Voice in → Voice out: Reply with voice-only when voice received. Text in → Text out: Reply with text when text received. Never both. No transcripts to Telegram. Transcribe internally only.", "date": "2026-02-04", "tags": ["communication", "voice", "rules", "core"], "importance": "high"},
|
||||
|
||||
# VOICE SETTINGS
|
||||
{"text": "Voice settings: TTS Provider is Local Kokoro at http://10.0.0.228:8880. Voice is af_bella (American Female). Filename format is Kimi-YYYYMMDD-HHMMSS.ogg. STT is Faster-Whisper CPU base model.", "date": "2026-02-04", "tags": ["voice", "tts", "stt", "settings", "core"], "importance": "high"},
|
||||
|
||||
# MEMORY SYSTEM RULES
|
||||
{"text": "Two memory systems: 1) 'remember this' or 'note' → File-based (daily logs + MEMORY.md) automatic. 2) 'q remember', 'q recall', 'q save', 'q update' → Qdrant kimi_memories manual only. 'q update' = bulk sync all file memories to Qdrant without duplicates.", "date": "2026-02-10", "tags": ["memory", "qdrant", "rules", "commands", "core"], "importance": "high"},
|
||||
|
||||
{"text": "Qdrant memory is MANUAL ONLY. No automatic storage, no proactive retrieval, no auto-consolidation. Only when user explicitly requests with 'q' prefix. Daily file logs continue automatically.", "date": "2026-02-10", "tags": ["memory", "qdrant", "manual", "rules", "core"], "importance": "high"},
|
||||
|
||||
# AGENT MESSAGING
|
||||
{"text": "Other agent name: Max (formerly Jarvis). Max uses minimax-m2.1:cloud model. Redis agent messaging is MANUAL ONLY. No automatic heartbeat checks, no auto-notification queue. Manual only when user says 'check messages' or 'send to Max'.", "date": "2026-02-10", "tags": ["agent", "max", "redis", "messaging", "rules", "core"], "importance": "high"},
|
||||
|
||||
# TOOL RULES
|
||||
{"text": "CRITICAL: Read ACTIVE.md BEFORE every tool use. Mandatory. Use file_path not path for read. Use old_string and new_string not newText/oldText for edit. Check parameter names every time. Quality over speed.", "date": "2026-02-05", "tags": ["tools", "rules", "active", "syntax", "critical"], "importance": "high"},
|
||||
|
||||
{"text": "If edit fails 2 times, switch to write tool. Never use path parameter. Never use newText/oldText. Always verify parameters match ACTIVE.md before executing.", "date": "2026-02-05", "tags": ["tools", "rules", "edit", "write", "recovery"], "importance": "high"},
|
||||
|
||||
# ERROR REPORTING
|
||||
{"text": "CRITICAL: When hitting a blocking error during an active task, report immediately - do not wait for user to ask. Do not say 'let me know when it is complete' if progress is blocked. Immediately report: 'Stopped - [reason]. Cannot proceed.' Applies to service outages, permission errors, resource exhaustion.", "date": "2026-02-10", "tags": ["errors", "reporting", "critical", "rules", "blocking"], "importance": "high"},
|
||||
|
||||
# RESEARCH
|
||||
{"text": "Always search web before installing. Research docs, best practices. Local docs exception: If docs are local (OpenClaw, ClawHub), use those first. Search-first sites: docs.openclaw.ai, clawhub.com, github.com, stackoverflow.com, wikipedia.org, archlinux.org.", "date": "2026-02-04", "tags": ["research", "search", "policy", "rules", "web"], "importance": "high"},
|
||||
|
||||
# WEB SEARCH
|
||||
{"text": "Default search engine: SearXNG local instance at http://10.0.0.8:8888. Method: curl to SearXNG. Always use SearXNG for web search. Browser tool only when gateway running and extension attached.", "date": "2026-02-04", "tags": ["search", "searxng", "web", "tools", "rules"], "importance": "high"},
|
||||
|
||||
# NOTIFICATIONS
|
||||
{"text": "Always use Telegram text only unless requested otherwise. Only send notifications between 7am-10pm CST. All timestamps US CST. If notification needed outside hours, queue as heartbeat task to send at next allowed time.", "date": "2026-02-06", "tags": ["notifications", "telegram", "rules", "time", "cst"], "importance": "high"},
|
||||
|
||||
# VOICE PATHS
|
||||
{"text": "Voice skill paths: Whisper (inbound STT): /skills/local-whisper-stt/scripts/transcribe.py. TTS (outbound voice): /skills/kimi-tts-custom/scripts/voice_reply.py <chat_id> 'text'. Text reference to voice file does NOT send audio. Must use voice_reply.py or proper Telegram API.", "date": "2026-02-04", "tags": ["voice", "paths", "skills", "whisper", "tts"], "importance": "high"},
|
||||
|
||||
# QDRANT SETUP
|
||||
{"text": "Qdrant location: http://10.0.0.40:6333. Collection: kimi_memories. Vector size: 1024 (snowflake-arctic-embed2). Distance: Cosine. New collection created 2026-02-10 for manual memory backup.", "date": "2026-02-10", "tags": ["qdrant", "setup", "vector", "snowflake", "collection"], "importance": "medium"},
|
||||
|
||||
# OLLAMA SETUP
|
||||
{"text": "Ollama main server: http://10.0.0.10:11434 (GPU-enabled). My model: ollama/kimi-k2.5:cloud. Max model: minimax-m2.1:cloud. Snowflake-arctic-embed2 pulled 2026-02-10 for embeddings.", "date": "2026-02-10", "tags": ["ollama", "setup", "models", "gpu", "embedding"], "importance": "medium"},
|
||||
|
||||
# LOCAL SERVICES
|
||||
{"text": "Local services: Kokoro TTS at 10.0.0.228:8880. Ollama at 10.0.0.10:11434. SearXNG at 10.0.0.8:8888. Qdrant at 10.0.0.40:6333. Redis at 10.0.0.36:6379.", "date": "2026-02-04", "tags": ["infrastructure", "services", "local", "ips"], "importance": "medium"},
|
||||
|
||||
# INSTALLED SOFTWARE
|
||||
{"text": "Already installed: n8n, ollama, openclaw, openwebui, anythingllm, searxng, flowise, plex, radarr, sonarr, sabnzbd, comfyui. Do not recommend these when suggesting software.", "date": "2026-02-04", "tags": ["software", "installed", "stack", "existing"], "importance": "medium"},
|
||||
|
||||
# SSH HOSTS
|
||||
{"text": "SSH hosts: epyc-debian2-SSH (deb2) at n8n@10.0.0.39. Auth: SSH key ~/.ssh/id_ed25519. Sudo password: passw0rd. epyc-debian-SSH (deb) had OpenClaw removed 2026-02-07.", "date": "2026-02-04", "tags": ["ssh", "hosts", "deb2", "infrastructure"], "importance": "medium"},
|
||||
|
||||
# YOUTUBE SEO
|
||||
{"text": "YouTube SEO: Tags target ~490 characters comma-separated. Include primary keywords, secondary keywords, long-tail terms. Mix broad terms (Homelab) + specific terms (Proxmox LXC). CRITICAL: Pull latest 48 hours of search data/trends when composing SEO elements.", "date": "2026-02-06", "tags": ["youtube", "seo", "content", "rules", "tags"], "importance": "medium"},
|
||||
|
||||
# ROB'S PERSONALITY
|
||||
{"text": "Rob's personality: Comical and funny most of the time. Humor is logical/structured, not random/absurd. Has fun with the process. Applies to content creation and general approach.", "date": "2026-02-06", "tags": ["rob", "personality", "humor", "content"], "importance": "medium"},
|
||||
|
||||
# SHORTHAND
|
||||
{"text": "Shorthand: 'msgs' = Redis messages (agent-messages stream at 10.0.0.36:6379). 'messages' = Telegram direct chat. 'notification' = Telegram alerts/updates. 'full search' = use ALL tools available, comprehensive high-quality.", "date": "2026-02-06", "tags": ["shorthand", "terms", "messaging", "definitions"], "importance": "medium"},
|
||||
|
||||
# FULL SEARCH
|
||||
{"text": "Full search definition: When Rob says 'full search', use ALL tools available, find quality results. Combine SearXNG, KB search, web crawling, any other resources. Do not limit to one method - comprehensive, high-quality information.", "date": "2026-02-06", "tags": ["search", "full", "definition", "tools", "comprehensive"], "importance": "medium"},
|
||||
|
||||
# CRON RULES
|
||||
{"text": "Cron rules: Use --cron not --schedule. No --enabled flag (jobs enabled by default). Scripts MUST always exit with code 0. Use output presence for significance, not exit codes. Always check openclaw cron list first.", "date": "2026-02-04", "tags": ["cron", "rules", "scheduling", "exit"], "importance": "medium"},
|
||||
|
||||
# HEARTBEAT RULES
|
||||
{"text": "Heartbeat: Keep HEARTBEAT.md empty or commented to skip automatic checks. Manual Redis messaging only when user requests. No automatic actions on heartbeat.", "date": "2026-02-10", "tags": ["heartbeat", "rules", "manual", "redis"], "importance": "medium"},
|
||||
|
||||
# SETUP MILESTONES
|
||||
{"text": "Setup milestones: 2026-02-04 Initial Bootstrap (identity, voice, skills). 2026-02-04 Qdrant Memory v1. 2026-02-05 ACTIVE.md Enforcement Rule. 2026-02-06 Agent Name Change (Jarvis→Max). 2026-02-10 Memory Manual Mode. 2026-02-10 Agent Messaging Manual Mode. 2026-02-10 Immediate Error Reporting Rule.", "date": "2026-02-10", "tags": ["milestones", "setup", "history", "dates"], "importance": "medium"},
|
||||
|
||||
# 3RD LXC PROJECT
|
||||
{"text": "Project: 3rd OpenClaw LXC. Clone of Max's setup. Will run local GPT. Status: Idea phase, awaiting planning/implementation. Mentioned 2026-02-06.", "date": "2026-02-06", "tags": ["project", "openclaw", "lxc", "gpt", "planned"], "importance": "low"},
|
||||
|
||||
# OLLAMA PRICING
|
||||
{"text": "Ollama pricing: Free=$0 (local only). Pro=$20/mo (multiple cloud, 3 private models, 3 collaborators). Max=$100/mo (5+ cloud, 5x usage, 5 private, 5 collaborators). Key: concurrency, cloud usage, private models, collaborators.", "date": "2026-02-06", "tags": ["ollama", "pricing", "plans", "max", "pro"], "importance": "low"},
|
||||
|
||||
# CONTAINER LIMITS
|
||||
{"text": "Container limits: No GPUs attached to main container. All ML workloads run on CPU here. Whisper uses tiny or base models for speed. GPU is at 10.0.0.10 for Ollama.", "date": "2026-02-04", "tags": ["container", "limits", "gpu", "cpu", "whisper"], "importance": "medium"},
|
||||
|
||||
# SKILLS LOCATION
|
||||
{"text": "Skills location: /root/.openclaw/workspace/skills/. Current skills: local-whisper-stt (inbound voice transcription), kimi-tts-custom (outbound voice with custom filenames), qdrant-memory (manual vector storage).", "date": "2026-02-04", "tags": ["skills", "paths", "location", "workspace"], "importance": "medium"},
|
||||
|
||||
# BOUNDARIES
|
||||
{"text": "Boundaries: Private things stay private. Ask before sending emails/tweets/public posts. Not Rob's voice in group chats - I'm a participant, not his proxy. Careful with external actions, bold with internal ones.", "date": "2026-02-04", "tags": ["boundaries", "privacy", "external", "group", "rules"], "importance": "high"},
|
||||
|
||||
# BEING HELPFUL
|
||||
{"text": "How to be helpful: Actions > words - skip the fluff, just help. Have opinions - not a search engine with extra steps. Resourceful first - try to figure it out before asking. Competence earns trust - careful with external actions.", "date": "2026-02-04", "tags": ["helpful", "personality", "actions", "opinions", "competence"], "importance": "high"},
|
||||
|
||||
# WRITING POLICY
|
||||
{"text": "Writing policy: If I want to remember something, WRITE IT TO A FILE. Memory is limited - files survive session restarts. When someone says 'remember this' → update memory/YYYY-MM-DD.md. When I learn a lesson → update relevant file.", "date": "2026-02-04", "tags": ["writing", "memory", "files", "persistence", "rules"], "importance": "high"},
|
||||
|
||||
# GROUP CHAT
|
||||
{"text": "Group chat rules: Respond when directly mentioned, can add genuine value, something witty fits naturally, correcting misinformation, summarizing when asked. Stay silent when casual banter, someone already answered, response would be 'yeah' or 'nice', conversation flows fine. Quality > quantity.", "date": "2026-02-04", "tags": ["group", "chat", "rules", "respond", "silent"], "importance": "medium"},
|
||||
|
||||
# REACTIONS
|
||||
{"text": "Reactions: Use emoji reactions naturally on platforms that support them. React to acknowledge without interrupting, appreciate without replying, simple yes/no situations. One reaction per message max.", "date": "2026-02-04", "tags": ["reactions", "emoji", "group", "acknowledge"], "importance": "low"},
|
||||
|
||||
# INSTALLATION POLICY
|
||||
{"text": "Installation policy decision tree: 1) Can it be a skill? → Create skill (cleanest, reusable). 2) Does it fit TOOLS.md? → Add to TOOLS.md (environment-specific: device names, SSH hosts, voice prefs). 3) Neither → Suggest other options.", "date": "2026-02-04", "tags": ["installation", "policy", "skills", "tools", "decision"], "importance": "medium"},
|
||||
|
||||
# WEBSITE MIRRORING
|
||||
{"text": "Website mirroring tools: wget --mirror (built-in, simple), httrack (free GUI), Cyotek WebCopy (Windows), SiteSucker (macOS), wpull (Python, JS-heavy sites), monolith (single-file). For dynamic sites: Playwright + Python script.", "date": "2026-02-10", "tags": ["website", "mirror", "tools", "wget", "httrack", "scrape"], "importance": "low"},
|
||||
|
||||
# HEARTBEAT_OK
|
||||
{"text": "HEARTBEAT_OK: When receiving heartbeat poll and nothing needs attention, reply exactly HEARTBEAT_OK. It must be entire message, nothing else. Never append to actual response, never wrap in markdown.", "date": "2026-02-04", "tags": ["heartbeat", "rules", "response", "format"], "importance": "medium"},
|
||||
|
||||
# MEMORY FILES GUIDE
|
||||
{"text": "Memory files: SOUL.md (who I am - read every session). USER.md (who I'm helping - read every session). AGENTS.md (workspace rules - read every session). ACTIVE.md (tool syntax - read BEFORE every tool use). TOOLS.md (tool patterns, SSH hosts - when errors). SKILL.md (skill-specific - before using skill). MEMORY.md (long-term - main session only).", "date": "2026-02-04", "tags": ["memory", "files", "guide", "reading", "session"], "importance": "high"},
|
||||
]
|
||||
|
||||
import sys
|
||||
print(f"Prepared {len(memories)} memories for backup")
|
||||
print("Starting storage to kimi_memories...")
|
||||
print()
|
||||
|
||||
success = 0
|
||||
failed = 0
|
||||
|
||||
for i, mem in enumerate(memories, 1):
|
||||
print(f"[{i}/{len(memories)}] {mem['text'][:50]}...")
|
||||
|
||||
embedding = embed(mem['text'])
|
||||
if not embedding:
|
||||
print(f" ❌ Failed to generate embedding")
|
||||
failed += 1
|
||||
continue
|
||||
|
||||
if store(mem['text'], embedding, mem['date'], mem['tags'], mem['importance']):
|
||||
print(f" ✅ Stored")
|
||||
success += 1
|
||||
else:
|
||||
print(f" ❌ Failed to store")
|
||||
failed += 1
|
||||
|
||||
print()
|
||||
print("=" * 60)
|
||||
print(f"BACKUP COMPLETE")
|
||||
print(f" Success: {success}")
|
||||
print(f" Failed: {failed}")
|
||||
print(f" Total: {len(memories)}")
|
||||
print("=" * 60)
|
||||
|
||||
if failed == 0:
|
||||
print("\n✅ All memories successfully backed up to kimi_memories!")
|
||||
else:
|
||||
print(f"\n⚠️ {failed} memories failed. Check errors above.")
|
||||
135
skills/qdrant-memory/scripts/hybrid_search.py
Executable file
135
skills/qdrant-memory/scripts/hybrid_search.py
Executable file
@@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Hybrid search: Search both file-based memory and Qdrant vectors
|
||||
Usage: hybrid_search.py "Query text" [--file-limit 3] [--vector-limit 3]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
WORKSPACE = "/root/.openclaw/workspace"
|
||||
MEMORY_DIR = f"{WORKSPACE}/memory"
|
||||
|
||||
def search_files(query, limit=3):
|
||||
"""Search recent memory files for keyword matches"""
|
||||
results = []
|
||||
|
||||
# Get recent memory files (last 30 days)
|
||||
files = []
|
||||
today = datetime.now()
|
||||
for i in range(30):
|
||||
date_str = (today - timedelta(days=i)).strftime("%Y-%m-%d")
|
||||
filepath = f"{MEMORY_DIR}/{date_str}.md"
|
||||
if os.path.exists(filepath):
|
||||
files.append((date_str, filepath))
|
||||
|
||||
# Simple keyword search
|
||||
query_lower = query.lower()
|
||||
keywords = set(query_lower.split())
|
||||
|
||||
for date_str, filepath in files[:7]: # Check last 7 days max
|
||||
try:
|
||||
with open(filepath, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
# Find sections that match
|
||||
lines = content.split('\n')
|
||||
for i, line in enumerate(lines):
|
||||
line_lower = line.lower()
|
||||
if any(kw in line_lower for kw in keywords):
|
||||
# Get context (3 lines before and after)
|
||||
start = max(0, i - 3)
|
||||
end = min(len(lines), i + 4)
|
||||
context = '\n'.join(lines[start:end])
|
||||
|
||||
# Simple relevance score based on keyword matches
|
||||
score = sum(1 for kw in keywords if kw in line_lower) / len(keywords)
|
||||
|
||||
results.append({
|
||||
"source": f"file:{filepath}",
|
||||
"date": date_str,
|
||||
"score": score,
|
||||
"text": context.strip(),
|
||||
"type": "file"
|
||||
})
|
||||
|
||||
if len(results) >= limit * 2: # Get more then dedupe
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
# Sort by score and return top N
|
||||
results.sort(key=lambda x: x["score"], reverse=True)
|
||||
return results[:limit]
|
||||
|
||||
def search_qdrant(query, limit=3):
|
||||
"""Search Qdrant using the search_memories script"""
|
||||
try:
|
||||
script_path = f"{WORKSPACE}/skills/qdrant-memory/scripts/search_memories.py"
|
||||
result = subprocess.run(
|
||||
["python3", script_path, query, "--limit", str(limit), "--json"],
|
||||
capture_output=True, text=True, timeout=60
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
memories = json.loads(result.stdout)
|
||||
for m in memories:
|
||||
m["type"] = "vector"
|
||||
m["source"] = "qdrant"
|
||||
return memories
|
||||
except Exception as e:
|
||||
print(f"Qdrant search failed (falling back to files only): {e}", file=sys.stderr)
|
||||
|
||||
return []
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Hybrid memory search")
|
||||
parser.add_argument("query", help="Search query")
|
||||
parser.add_argument("--file-limit", type=int, default=3, help="Max file results")
|
||||
parser.add_argument("--vector-limit", type=int, default=3, help="Max vector results")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"Searching for: '{args.query}'\n", file=sys.stderr)
|
||||
|
||||
# Search both sources
|
||||
file_results = search_files(args.query, args.file_limit)
|
||||
vector_results = search_qdrant(args.query, args.vector_limit)
|
||||
|
||||
# Combine results
|
||||
all_results = file_results + vector_results
|
||||
|
||||
if not all_results:
|
||||
print("No memories found matching your query.")
|
||||
sys.exit(0)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(all_results, indent=2))
|
||||
else:
|
||||
print(f"📁 File-based results ({len(file_results)}):")
|
||||
print("-" * 50)
|
||||
for r in file_results:
|
||||
print(f"[{r['date']}] Score: {r['score']:.2f}")
|
||||
print(r['text'][:300])
|
||||
if len(r['text']) > 300:
|
||||
print("...")
|
||||
print()
|
||||
|
||||
print(f"\n🔍 Vector (Qdrant) results ({len(vector_results)}):")
|
||||
print("-" * 50)
|
||||
for r in vector_results:
|
||||
print(f"[{r.get('date', 'unknown')}] Score: {r.get('score', 0):.3f} [{r.get('importance', 'medium')}]")
|
||||
text = r.get('text', '')
|
||||
print(text[:300])
|
||||
if len(text) > 300:
|
||||
print("...")
|
||||
if r.get('tags'):
|
||||
print(f"Tags: {', '.join(r['tags'])}")
|
||||
print()
|
||||
113
skills/qdrant-memory/scripts/init_collection.py
Executable file
113
skills/qdrant-memory/scripts/init_collection.py
Executable file
@@ -0,0 +1,113 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Initialize Qdrant collection for OpenClaw memories
|
||||
Usage: init_collection.py [--recreate]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import urllib.request
|
||||
import json
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "openclaw_memories"
|
||||
|
||||
def make_request(url, data=None, method="GET"):
|
||||
"""Make HTTP request with proper method"""
|
||||
req = urllib.request.Request(url, method=method)
|
||||
if data:
|
||||
req.data = json.dumps(data).encode()
|
||||
req.add_header("Content-Type", "application/json")
|
||||
return req
|
||||
|
||||
def collection_exists():
|
||||
"""Check if collection exists"""
|
||||
try:
|
||||
req = make_request(f"{QDRANT_URL}/collections/{COLLECTION_NAME}")
|
||||
with urllib.request.urlopen(req, timeout=5) as response:
|
||||
return True
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 404:
|
||||
return False
|
||||
raise
|
||||
except Exception as e:
|
||||
print(f"Error checking collection: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def create_collection():
|
||||
"""Create the memories collection using PUT"""
|
||||
config = {
|
||||
"vectors": {
|
||||
"size": 768, # nomic-embed-text outputs 768 dimensions
|
||||
"distance": "Cosine"
|
||||
}
|
||||
}
|
||||
|
||||
req = make_request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}",
|
||||
data=config,
|
||||
method="PUT"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("result") == True
|
||||
except Exception as e:
|
||||
print(f"Error creating collection: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def delete_collection():
|
||||
"""Delete collection if exists"""
|
||||
req = make_request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}",
|
||||
method="DELETE"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=5) as response:
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error deleting collection: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Initialize Qdrant collection")
|
||||
parser.add_argument("--recreate", action="store_true", help="Delete and recreate collection")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check if Qdrant is reachable
|
||||
try:
|
||||
req = make_request(f"{QDRANT_URL}/")
|
||||
with urllib.request.urlopen(req, timeout=3) as response:
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"❌ Cannot connect to Qdrant at {QDRANT_URL}: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"✅ Connected to Qdrant at {QDRANT_URL}")
|
||||
|
||||
exists = collection_exists()
|
||||
|
||||
if exists and args.recreate:
|
||||
print(f"Deleting existing collection '{COLLECTION_NAME}'...")
|
||||
if delete_collection():
|
||||
print(f"✅ Deleted collection")
|
||||
exists = False
|
||||
else:
|
||||
print(f"❌ Failed to delete collection", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if not exists:
|
||||
print(f"Creating collection '{COLLECTION_NAME}'...")
|
||||
if create_collection():
|
||||
print(f"✅ Created collection '{COLLECTION_NAME}'")
|
||||
print(f" Vector size: 768, Distance: Cosine")
|
||||
else:
|
||||
print(f"❌ Failed to create collection", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print(f"✅ Collection '{COLLECTION_NAME}' already exists")
|
||||
|
||||
print("\n🎉 Qdrant memory collection ready!")
|
||||
112
skills/qdrant-memory/scripts/init_knowledge_base.py
Executable file
112
skills/qdrant-memory/scripts/init_knowledge_base.py
Executable file
@@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Initialize Qdrant collection for Knowledge Base
|
||||
Usage: init_knowledge_base.py [--recreate]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import urllib.request
|
||||
import json
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "knowledge_base"
|
||||
|
||||
def make_request(url, data=None, method="GET"):
|
||||
"""Make HTTP request with proper method"""
|
||||
req = urllib.request.Request(url, method=method)
|
||||
if data:
|
||||
req.data = json.dumps(data).encode()
|
||||
req.add_header("Content-Type", "application/json")
|
||||
return req
|
||||
|
||||
def collection_exists():
|
||||
"""Check if collection exists"""
|
||||
try:
|
||||
req = make_request(f"{QDRANT_URL}/collections/{COLLECTION_NAME}")
|
||||
with urllib.request.urlopen(req, timeout=5) as response:
|
||||
return True
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 404:
|
||||
return False
|
||||
raise
|
||||
except Exception as e:
|
||||
print(f"Error checking collection: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def create_collection():
|
||||
"""Create the knowledge_base collection using PUT"""
|
||||
config = {
|
||||
"vectors": {
|
||||
"size": 768,
|
||||
"distance": "Cosine"
|
||||
}
|
||||
}
|
||||
|
||||
req = make_request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}",
|
||||
data=config,
|
||||
method="PUT"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("result") == True
|
||||
except Exception as e:
|
||||
print(f"Error creating collection: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def delete_collection():
|
||||
"""Delete collection if exists"""
|
||||
req = make_request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}",
|
||||
method="DELETE"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=5) as response:
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error deleting collection: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Initialize Qdrant knowledge_base collection")
|
||||
parser.add_argument("--recreate", action="store_true", help="Delete and recreate collection")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
req = make_request(f"{QDRANT_URL}/")
|
||||
with urllib.request.urlopen(req, timeout=3) as response:
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"❌ Cannot connect to Qdrant at {QDRANT_URL}: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"✅ Connected to Qdrant at {QDRANT_URL}")
|
||||
|
||||
exists = collection_exists()
|
||||
|
||||
if exists and args.recreate:
|
||||
print(f"Deleting existing collection '{COLLECTION_NAME}'...")
|
||||
if delete_collection():
|
||||
print(f"✅ Deleted collection")
|
||||
exists = False
|
||||
else:
|
||||
print(f"❌ Failed to delete collection", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if not exists:
|
||||
print(f"Creating collection '{COLLECTION_NAME}'...")
|
||||
if create_collection():
|
||||
print(f"✅ Created collection '{COLLECTION_NAME}'")
|
||||
print(f" Vector size: 768, Distance: Cosine")
|
||||
else:
|
||||
print(f"❌ Failed to create collection", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print(f"✅ Collection '{COLLECTION_NAME}' already exists")
|
||||
|
||||
print("\n🎉 Knowledge base collection ready!")
|
||||
113
skills/qdrant-memory/scripts/init_projects_collection.py
Executable file
113
skills/qdrant-memory/scripts/init_projects_collection.py
Executable file
@@ -0,0 +1,113 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Initialize Qdrant collection for Projects
|
||||
Usage: init_projects_collection.py [--recreate]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import urllib.request
|
||||
import json
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "projects"
|
||||
|
||||
def make_request(url, data=None, method="GET"):
|
||||
"""Make HTTP request with proper method"""
|
||||
req = urllib.request.Request(url, method=method)
|
||||
if data:
|
||||
req.data = json.dumps(data).encode()
|
||||
req.add_header("Content-Type", "application/json")
|
||||
return req
|
||||
|
||||
def collection_exists():
|
||||
"""Check if collection exists"""
|
||||
try:
|
||||
req = make_request(f"{QDRANT_URL}/collections/{COLLECTION_NAME}")
|
||||
with urllib.request.urlopen(req, timeout=5) as response:
|
||||
return True
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 404:
|
||||
return False
|
||||
raise
|
||||
except Exception as e:
|
||||
print(f"Error checking collection: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def create_collection():
|
||||
"""Create the projects collection using PUT"""
|
||||
config = {
|
||||
"vectors": {
|
||||
"size": 768, # nomic-embed-text outputs 768 dimensions
|
||||
"distance": "Cosine"
|
||||
}
|
||||
}
|
||||
|
||||
req = make_request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}",
|
||||
data=config,
|
||||
method="PUT"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("result") == True
|
||||
except Exception as e:
|
||||
print(f"Error creating collection: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def delete_collection():
|
||||
"""Delete collection if exists"""
|
||||
req = make_request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}",
|
||||
method="DELETE"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=5) as response:
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error deleting collection: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Initialize Qdrant projects collection")
|
||||
parser.add_argument("--recreate", action="store_true", help="Delete and recreate collection")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check if Qdrant is reachable
|
||||
try:
|
||||
req = make_request(f"{QDRANT_URL}/")
|
||||
with urllib.request.urlopen(req, timeout=3) as response:
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"❌ Cannot connect to Qdrant at {QDRANT_URL}: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"✅ Connected to Qdrant at {QDRANT_URL}")
|
||||
|
||||
exists = collection_exists()
|
||||
|
||||
if exists and args.recreate:
|
||||
print(f"Deleting existing collection '{COLLECTION_NAME}'...")
|
||||
if delete_collection():
|
||||
print(f"✅ Deleted collection")
|
||||
exists = False
|
||||
else:
|
||||
print(f"❌ Failed to delete collection", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if not exists:
|
||||
print(f"Creating collection '{COLLECTION_NAME}'...")
|
||||
if create_collection():
|
||||
print(f"✅ Created collection '{COLLECTION_NAME}'")
|
||||
print(f" Vector size: 768, Distance: Cosine")
|
||||
else:
|
||||
print(f"❌ Failed to create collection", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print(f"✅ Collection '{COLLECTION_NAME}' already exists")
|
||||
|
||||
print("\n🎉 Qdrant projects collection ready!")
|
||||
190
skills/qdrant-memory/scripts/js_scraper.py
Executable file
190
skills/qdrant-memory/scripts/js_scraper.py
Executable file
@@ -0,0 +1,190 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
JavaScript Scraper - Headless browser for JS-heavy sites
|
||||
Uses Playwright to render dynamic content before scraping
|
||||
Usage: js_scraper.py <url> --domain "React" --path "Docs/Hooks" --wait-for "#content"
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from scrape_to_kb import chunk_text, get_embedding, compute_checksum, store_in_kb
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "knowledge_base"
|
||||
|
||||
def scrape_js_site(url, wait_for=None, wait_time=2000, scroll=False, viewport=None):
|
||||
"""Scrape JavaScript-rendered site using Playwright"""
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
|
||||
context_options = {}
|
||||
if viewport:
|
||||
context_options["viewport"] = {"width": viewport[0], "height": viewport[1]}
|
||||
|
||||
context = browser.new_context(**context_options)
|
||||
page = context.new_page()
|
||||
|
||||
# Set user agent
|
||||
page.set_extra_http_headers({
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||
})
|
||||
|
||||
try:
|
||||
print(f"🌐 Loading {url}...")
|
||||
page.goto(url, wait_until="networkidle", timeout=30000)
|
||||
|
||||
# Wait for specific element if requested
|
||||
if wait_for:
|
||||
print(f"⏳ Waiting for {wait_for}...")
|
||||
page.wait_for_selector(wait_for, timeout=10000)
|
||||
|
||||
# Additional wait for any animations/final renders
|
||||
page.wait_for_timeout(wait_time)
|
||||
|
||||
# Scroll to bottom if requested (for infinite scroll pages)
|
||||
if scroll:
|
||||
print("📜 Scrolling...")
|
||||
prev_height = 0
|
||||
while True:
|
||||
page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||
page.wait_for_timeout(500)
|
||||
new_height = page.evaluate("document.body.scrollHeight")
|
||||
if new_height == prev_height:
|
||||
break
|
||||
prev_height = new_height
|
||||
|
||||
# Get page data
|
||||
title = page.title()
|
||||
|
||||
# Extract clean text
|
||||
text = page.evaluate("""() => {
|
||||
// Remove script/style/nav/header/footer
|
||||
const scripts = document.querySelectorAll('script, style, nav, header, footer, aside, .advertisement, .ads');
|
||||
scripts.forEach(el => el.remove());
|
||||
|
||||
// Get main content if available, else body
|
||||
const main = document.querySelector('main, article, [role="main"], .content, .post-content, .entry-content');
|
||||
const content = main || document.body;
|
||||
|
||||
return content.innerText;
|
||||
}""")
|
||||
|
||||
# Get any JSON-LD structured data
|
||||
json_ld = page.evaluate("""() => {
|
||||
const scripts = document.querySelectorAll('script[type="application/ld+json"]');
|
||||
const data = [];
|
||||
scripts.forEach(s => {
|
||||
try {
|
||||
data.push(JSON.parse(s.textContent));
|
||||
} catch(e) {}
|
||||
});
|
||||
return data;
|
||||
}""")
|
||||
|
||||
# Get meta description
|
||||
meta_desc = page.evaluate("""() => {
|
||||
const meta = document.querySelector('meta[name=\"description\"], meta[property=\"og:description\"]');
|
||||
return meta ? meta.content : '';
|
||||
}""")
|
||||
|
||||
browser.close()
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"text": text,
|
||||
"meta_description": meta_desc,
|
||||
"json_ld": json_ld,
|
||||
"url": page.url # Final URL after redirects
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
browser.close()
|
||||
raise e
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Scrape JavaScript-heavy sites")
|
||||
parser.add_argument("url", help="URL to scrape")
|
||||
parser.add_argument("--domain", required=True, help="Knowledge domain")
|
||||
parser.add_argument("--path", required=True, help="Hierarchical path")
|
||||
parser.add_argument("--wait-for", help="CSS selector to wait for")
|
||||
parser.add_argument("--wait-time", type=int, default=2000, help="Wait time in ms after load")
|
||||
parser.add_argument("--scroll", action="store_true", help="Scroll to bottom (for infinite scroll)")
|
||||
parser.add_argument("--viewport", help="Viewport size (e.g., 1920x1080)")
|
||||
parser.add_argument("--category", default="reference")
|
||||
parser.add_argument("--content-type", default="web_page")
|
||||
parser.add_argument("--subjects", help="Comma-separated subjects")
|
||||
parser.add_argument("--title", help="Override title")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
viewport = None
|
||||
if args.viewport:
|
||||
w, h = args.viewport.split('x')
|
||||
viewport = (int(w), int(h))
|
||||
|
||||
try:
|
||||
result = scrape_js_site(
|
||||
args.url,
|
||||
wait_for=args.wait_for,
|
||||
wait_time=args.wait_time,
|
||||
scroll=args.scroll,
|
||||
viewport=viewport
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
title = args.title or result["title"]
|
||||
text = result["text"]
|
||||
|
||||
print(f"📄 Title: {title}")
|
||||
print(f"📝 Content: {len(text)} chars")
|
||||
|
||||
if len(text) < 200:
|
||||
print("❌ Content too short", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Add meta description if available
|
||||
if result["meta_description"]:
|
||||
text = f"Description: {result['meta_description']}\n\n{text}"
|
||||
|
||||
chunks = chunk_text(text)
|
||||
print(f"🧩 Chunks: {len(chunks)}")
|
||||
|
||||
subjects = [s.strip() for s in args.subjects.split(",")] if args.subjects else []
|
||||
checksum = compute_checksum(text)
|
||||
|
||||
print("💾 Storing...")
|
||||
stored = 0
|
||||
for i, chunk in enumerate(chunks):
|
||||
chunk_metadata = {
|
||||
"domain": args.domain,
|
||||
"path": f"{args.path}/chunk-{i+1}",
|
||||
"subjects": subjects,
|
||||
"category": args.category,
|
||||
"content_type": args.content_type,
|
||||
"title": f"{title} (part {i+1}/{len(chunks)})",
|
||||
"checksum": checksum,
|
||||
"source_url": result["url"],
|
||||
"date_added": "2026-02-05",
|
||||
"chunk_index": i + 1,
|
||||
"total_chunks": len(chunks),
|
||||
"text_preview": chunk[:200] + "..." if len(chunk) > 200 else chunk,
|
||||
"scraper_type": "playwright_headless",
|
||||
"rendered": True
|
||||
}
|
||||
|
||||
if store_in_kb(chunk, chunk_metadata):
|
||||
stored += 1
|
||||
print(f" ✓ Chunk {i+1}")
|
||||
|
||||
print(f"\n🎉 Stored {stored}/{len(chunks)} chunks")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
183
skills/qdrant-memory/scripts/kb_review.py
Executable file
183
skills/qdrant-memory/scripts/kb_review.py
Executable file
@@ -0,0 +1,183 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Review knowledge base for outdated entries
|
||||
Usage: kb_review.py [--days 180] [--domains "Domain1,Domain2"] [--dry-run]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import json
|
||||
import urllib.request
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
KB_COLLECTION = "knowledge_base"
|
||||
|
||||
# Domains where freshness matters (tech changes fast)
|
||||
FAST_MOVING_DOMAINS = ["AI/ML", "Python", "JavaScript", "Docker", "OpenClaw", "DevOps"]
|
||||
|
||||
def make_request(url, data=None, method="GET"):
|
||||
"""Make HTTP request"""
|
||||
req = urllib.request.Request(url, method=method)
|
||||
if data:
|
||||
req.data = json.dumps(data).encode()
|
||||
req.add_header("Content-Type", "application/json")
|
||||
return req
|
||||
|
||||
def get_all_entries(limit=1000):
|
||||
"""Get all entries from knowledge base"""
|
||||
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points/scroll"
|
||||
|
||||
data = {
|
||||
"limit": limit,
|
||||
"with_payload": True
|
||||
}
|
||||
|
||||
req = make_request(url, data, "POST")
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("result", {}).get("points", [])
|
||||
except Exception as e:
|
||||
print(f"❌ Error fetching entries: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def parse_date(date_str):
|
||||
"""Parse date string to datetime"""
|
||||
if not date_str:
|
||||
return None
|
||||
|
||||
formats = [
|
||||
"%Y-%m-%d",
|
||||
"%Y-%m-%dT%H:%M:%S",
|
||||
"%Y-%m-%dT%H:%M:%S.%f"
|
||||
]
|
||||
|
||||
for fmt in formats:
|
||||
try:
|
||||
return datetime.strptime(date_str.split('.')[0], fmt)
|
||||
except:
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
def is_outdated(entry, threshold_days, fast_moving_multiplier=0.5):
|
||||
"""Check if entry is outdated"""
|
||||
payload = entry.get("payload", {})
|
||||
|
||||
# Check date_scraped first, then date_added
|
||||
date_str = payload.get("date_scraped") or payload.get("date_added")
|
||||
entry_date = parse_date(date_str)
|
||||
|
||||
if not entry_date:
|
||||
return False, None # No date, can't determine
|
||||
|
||||
domain = payload.get("domain", "")
|
||||
|
||||
# Fast-moving domains get shorter threshold
|
||||
if domain in FAST_MOVING_DOMAINS:
|
||||
effective_threshold = int(threshold_days * fast_moving_multiplier)
|
||||
else:
|
||||
effective_threshold = threshold_days
|
||||
|
||||
age = datetime.now() - entry_date
|
||||
is_old = age.days > effective_threshold
|
||||
|
||||
return is_old, {
|
||||
"age_days": age.days,
|
||||
"threshold": effective_threshold,
|
||||
"domain": domain,
|
||||
"date": date_str
|
||||
}
|
||||
|
||||
def delete_entry(entry_id):
|
||||
"""Delete entry from knowledge base"""
|
||||
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points/delete"
|
||||
data = {"points": [entry_id]}
|
||||
|
||||
req = make_request(url, data, "POST")
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("status") == "ok"
|
||||
except Exception as e:
|
||||
print(f"❌ Error deleting: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Review knowledge base for outdated entries")
|
||||
parser.add_argument("--days", type=int, default=180, help="Age threshold in days")
|
||||
parser.add_argument("--domains", help="Comma-separated domains to check (default: all)")
|
||||
parser.add_argument("--fast-moving-only", action="store_true", help="Only check fast-moving domains")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show what would be deleted")
|
||||
parser.add_argument("--delete", action="store_true", help="Actually delete outdated entries")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"🔍 Fetching knowledge base entries...")
|
||||
entries = get_all_entries()
|
||||
|
||||
if not entries:
|
||||
print("❌ No entries found")
|
||||
return
|
||||
|
||||
print(f" Total entries: {len(entries)}")
|
||||
|
||||
# Filter by domain if specified
|
||||
if args.domains:
|
||||
target_domains = [d.strip() for d in args.domains.split(",")]
|
||||
entries = [e for e in entries if e.get("payload", {}).get("domain") in target_domains]
|
||||
print(f" Filtered to domains: {target_domains}")
|
||||
elif args.fast_moving_only:
|
||||
entries = [e for e in entries if e.get("payload", {}).get("domain") in FAST_MOVING_DOMAINS]
|
||||
print(f" Filtered to fast-moving domains: {FAST_MOVING_DOMAINS}")
|
||||
|
||||
# Check for outdated entries
|
||||
outdated = []
|
||||
for entry in entries:
|
||||
is_old, info = is_outdated(entry, args.days)
|
||||
if is_old:
|
||||
outdated.append({
|
||||
"entry": entry,
|
||||
"info": info
|
||||
})
|
||||
|
||||
if not outdated:
|
||||
print(f"\n✅ No outdated entries found!")
|
||||
return
|
||||
|
||||
print(f"\n⚠️ Found {len(outdated)} outdated entries:")
|
||||
print(f" (Threshold: {args.days} days, fast-moving: {int(args.days * 0.5)} days)")
|
||||
|
||||
for item in outdated:
|
||||
entry = item["entry"]
|
||||
info = item["info"]
|
||||
payload = entry.get("payload", {})
|
||||
|
||||
print(f"\n 📄 {payload.get('title', 'Untitled')}")
|
||||
print(f" Domain: {info['domain']} | Age: {info['age_days']} days | Threshold: {info['threshold']} days")
|
||||
print(f" Date: {info['date']}")
|
||||
print(f" Path: {payload.get('path', 'N/A')}")
|
||||
|
||||
if args.delete and not args.dry_run:
|
||||
if delete_entry(entry.get("id")):
|
||||
print(f" ✅ Deleted")
|
||||
else:
|
||||
print(f" ❌ Failed to delete")
|
||||
elif args.dry_run:
|
||||
print(f" [Would delete in non-dry-run mode]")
|
||||
|
||||
# Summary
|
||||
print(f"\n📊 Summary:")
|
||||
print(f" Total checked: {len(entries)}")
|
||||
print(f" Outdated: {len(outdated)}")
|
||||
|
||||
if args.dry_run:
|
||||
print(f"\n💡 Use --delete to remove these entries")
|
||||
elif not args.delete:
|
||||
print(f"\n💡 Use --dry-run to preview, --delete to remove")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
136
skills/qdrant-memory/scripts/kb_search.py
Executable file
136
skills/qdrant-memory/scripts/kb_search.py
Executable file
@@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Search kimi_kb (Knowledge Base) - Manual only
|
||||
|
||||
Usage:
|
||||
python3 kb_search.py "query"
|
||||
python3 kb_search.py "docker volumes" --domain "Docker"
|
||||
python3 kb_search.py "query" --include-urls
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION = "kimi_kb"
|
||||
OLLAMA_URL = "http://10.0.0.10:11434/v1"
|
||||
|
||||
def get_embedding(text):
|
||||
"""Generate embedding using snowflake-arctic-embed2"""
|
||||
data = json.dumps({
|
||||
"model": "snowflake-arctic-embed2",
|
||||
"input": text[:8192]
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{OLLAMA_URL}/embeddings",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=60) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result["data"][0]["embedding"]
|
||||
except Exception as e:
|
||||
print(f"Error generating embedding: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def search_kb(query, domain=None, limit=5):
|
||||
"""Search knowledge base"""
|
||||
|
||||
embedding = get_embedding(query)
|
||||
if embedding is None:
|
||||
return None
|
||||
|
||||
# Build filter if domain specified
|
||||
filter_clause = {}
|
||||
if domain:
|
||||
filter_clause = {
|
||||
"must": [
|
||||
{"key": "domain", "match": {"value": domain}}
|
||||
]
|
||||
}
|
||||
|
||||
search_body = {
|
||||
"vector": embedding,
|
||||
"limit": limit,
|
||||
"with_payload": True,
|
||||
"with_vector": False
|
||||
}
|
||||
|
||||
if filter_clause:
|
||||
search_body["filter"] = filter_clause
|
||||
|
||||
data = json.dumps(search_body).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("result", [])
|
||||
except Exception as e:
|
||||
print(f"Error searching KB: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def format_result(point, idx):
|
||||
"""Format a search result for display"""
|
||||
payload = point.get("payload", {})
|
||||
score = point.get("score", 0)
|
||||
|
||||
output = f"\n[{idx}] {payload.get('title', 'Untitled')} (score: {score:.3f})\n"
|
||||
output += f" Domain: {payload.get('domain', 'unknown')}\n"
|
||||
|
||||
if payload.get('url'):
|
||||
output += f" URL: {payload['url']}\n"
|
||||
if payload.get('source'):
|
||||
output += f" Source: {payload['source']}\n"
|
||||
|
||||
text = payload.get('text', '')[:300]
|
||||
if len(payload.get('text', '')) > 300:
|
||||
text += "..."
|
||||
output += f" Content: {text}\n"
|
||||
|
||||
return output
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Search kimi_kb")
|
||||
parser.add_argument("query", help="Search query")
|
||||
parser.add_argument("--domain", default=None, help="Filter by domain")
|
||||
parser.add_argument("--limit", type=int, default=5, help="Number of results")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"🔍 Searching kimi_kb: {args.query}")
|
||||
if args.domain:
|
||||
print(f" Filter: domain={args.domain}")
|
||||
print()
|
||||
|
||||
results = search_kb(args.query, args.domain, args.limit)
|
||||
|
||||
if results is None:
|
||||
print("❌ Search failed", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if not results:
|
||||
print("No results found in kimi_kb")
|
||||
return
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(results, indent=2))
|
||||
else:
|
||||
print(f"Found {len(results)} results:\n")
|
||||
for i, point in enumerate(results, 1):
|
||||
print(format_result(point, i))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
124
skills/qdrant-memory/scripts/kb_store.py
Executable file
124
skills/qdrant-memory/scripts/kb_store.py
Executable file
@@ -0,0 +1,124 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Store content to kimi_kb (Knowledge Base) - Manual only
|
||||
|
||||
Usage:
|
||||
python3 kb_store.py "Content text" --title "Title" --domain "Category" --tags "tag1,tag2"
|
||||
python3 kb_store.py "Content" --title "X" --url "https://example.com" --source "docs.site"
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION = "kimi_kb"
|
||||
OLLAMA_URL = "http://10.0.0.10:11434/v1"
|
||||
|
||||
def get_embedding(text):
|
||||
"""Generate embedding using snowflake-arctic-embed2"""
|
||||
data = json.dumps({
|
||||
"model": "snowflake-arctic-embed2",
|
||||
"input": text[:8192]
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{OLLAMA_URL}/embeddings",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=60) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result["data"][0]["embedding"]
|
||||
except Exception as e:
|
||||
print(f"Error generating embedding: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def store_to_kb(text, title=None, url=None, source=None, domain=None,
|
||||
tags=None, content_type="document"):
|
||||
"""Store content to kimi_kb collection"""
|
||||
|
||||
embedding = get_embedding(text)
|
||||
if embedding is None:
|
||||
return False
|
||||
|
||||
point_id = str(uuid.uuid4())
|
||||
|
||||
payload = {
|
||||
"text": text,
|
||||
"title": title or "Untitled",
|
||||
"url": url or "",
|
||||
"source": source or "manual",
|
||||
"domain": domain or "general",
|
||||
"tags": tags or [],
|
||||
"content_type": content_type,
|
||||
"date": datetime.now().strftime("%Y-%m-%d"),
|
||||
"created_at": datetime.now().isoformat(),
|
||||
"access_count": 0
|
||||
}
|
||||
|
||||
point = {
|
||||
"points": [{
|
||||
"id": point_id,
|
||||
"vector": embedding,
|
||||
"payload": payload
|
||||
}]
|
||||
}
|
||||
|
||||
data = json.dumps(point).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION}/points?wait=true",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="PUT"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("status") == "ok"
|
||||
except Exception as e:
|
||||
print(f"Error storing to KB: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Store content to kimi_kb")
|
||||
parser.add_argument("content", help="Content to store")
|
||||
parser.add_argument("--title", default=None, help="Title of the content")
|
||||
parser.add_argument("--url", default=None, help="Source URL if from web")
|
||||
parser.add_argument("--source", default=None, help="Source name (e.g., 'docs.openclaw.ai')")
|
||||
parser.add_argument("--domain", default="general", help="Domain/category (e.g., 'OpenClaw', 'Docker')")
|
||||
parser.add_argument("--tags", default=None, help="Comma-separated tags")
|
||||
parser.add_argument("--type", default="document", choices=["document", "web", "code", "note"],
|
||||
help="Content type")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
tags = [t.strip() for t in args.tags.split(",")] if args.tags else []
|
||||
|
||||
print(f"Storing to kimi_kb: {args.title or 'Untitled'}...")
|
||||
|
||||
if store_to_kb(
|
||||
text=args.content,
|
||||
title=args.title,
|
||||
url=args.url,
|
||||
source=args.source,
|
||||
domain=args.domain,
|
||||
tags=tags,
|
||||
content_type=args.type
|
||||
):
|
||||
print(f"✅ Stored to kimi_kb ({args.domain})")
|
||||
else:
|
||||
print("❌ Failed to store")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
77
skills/qdrant-memory/scripts/log_activity.py
Normal file
77
skills/qdrant-memory/scripts/log_activity.py
Normal file
@@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Convenience wrapper for activity logging
|
||||
Add to your scripts: from log_activity import log_done, check_other_agent
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from activity_log import log_activity, check_for_duplicates, get_recent_activities
|
||||
|
||||
AGENT_NAME = "Kimi" # Change to "Max" on that instance
|
||||
|
||||
def log_done(action_type: str, description: str, files=None, status="completed"):
|
||||
"""
|
||||
Quick log of completed work
|
||||
|
||||
Example:
|
||||
log_done("cron_created", "Set up daily OpenClaw repo monitoring",
|
||||
files=["/path/to/script.py"])
|
||||
"""
|
||||
activity_id = log_activity(
|
||||
agent=AGENT_NAME,
|
||||
action_type=action_type,
|
||||
description=description,
|
||||
affected_files=files or [],
|
||||
status=status
|
||||
)
|
||||
print(f"[ActivityLog] Logged: {action_type} → {activity_id[:8]}...")
|
||||
return activity_id
|
||||
|
||||
def check_other_agent(action_type: str, keywords: str, hours: int = 6) -> bool:
|
||||
"""
|
||||
Check if Max (or Kimi) already did this recently
|
||||
|
||||
Example:
|
||||
if check_other_agent("cron_created", "openclaw repo monitoring"):
|
||||
print("Max already set this up!")
|
||||
return
|
||||
"""
|
||||
other_agent = "Max" if AGENT_NAME == "Kimi" else "Kimi"
|
||||
|
||||
recent = get_recent_activities(agent=other_agent, action_type=action_type, hours=hours)
|
||||
|
||||
keywords_lower = keywords.lower().split()
|
||||
for activity in recent:
|
||||
desc = activity.get("description", "").lower()
|
||||
if all(kw in desc for kw in keywords_lower):
|
||||
print(f"[ActivityLog] ⚠️ {other_agent} already did this!")
|
||||
print(f" When: {activity['timestamp'][:19]}")
|
||||
print(f" What: {activity['description']}")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def show_recent_collaboration(hours: int = 24):
|
||||
"""Show what both agents have been up to"""
|
||||
activities = get_recent_activities(hours=hours, limit=50)
|
||||
|
||||
print(f"\n[ActivityLog] Both agents' work (last {hours}h):\n")
|
||||
for a in activities:
|
||||
agent = a['agent']
|
||||
icon = "🤖" if agent == "Max" else "🎙️"
|
||||
print(f"{icon} [{a['timestamp'][11:19]}] {agent}: {a['action_type']}")
|
||||
print(f" {a['description']}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Quick test
|
||||
print(f"Agent: {AGENT_NAME}")
|
||||
print("Functions available:")
|
||||
print(" log_done(action_type, description, files=[], status='completed')")
|
||||
print(" check_other_agent(action_type, keywords, hours=6)")
|
||||
print(" show_recent_collaboration(hours=24)")
|
||||
print()
|
||||
print("Recent activity:")
|
||||
show_recent_collaboration(hours=24)
|
||||
212
skills/qdrant-memory/scripts/memory_decay.py
Executable file
212
skills/qdrant-memory/scripts/memory_decay.py
Executable file
@@ -0,0 +1,212 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Memory decay system - handle expiration and cleanup
|
||||
Usage: memory_decay.py check|cleanup
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import urllib.request
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "openclaw_memories"
|
||||
|
||||
def get_expired_memories():
|
||||
"""Find memories that have passed their expiration date"""
|
||||
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
# Search for memories with expires_at <= today
|
||||
search_body = {
|
||||
"filter": {
|
||||
"must": [
|
||||
{
|
||||
"key": "expires_at",
|
||||
"range": {
|
||||
"lte": today
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"limit": 100,
|
||||
"with_payload": True
|
||||
}
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points/scroll",
|
||||
data=json.dumps(search_body).encode(),
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("result", {}).get("points", [])
|
||||
except Exception as e:
|
||||
print(f"Error finding expired memories: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def get_stale_memories(days=90):
|
||||
"""Find memories not accessed in a long time"""
|
||||
|
||||
cutoff = (datetime.now() - timedelta(days=days)).isoformat()
|
||||
|
||||
search_body = {
|
||||
"filter": {
|
||||
"must": [
|
||||
{
|
||||
"key": "last_accessed",
|
||||
"range": {
|
||||
"lte": cutoff
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "importance",
|
||||
"match": {
|
||||
"value": "low"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"limit": 100,
|
||||
"with_payload": True
|
||||
}
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points/scroll",
|
||||
data=json.dumps(search_body).encode(),
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("result", {}).get("points", [])
|
||||
except Exception as e:
|
||||
print(f"Error finding stale memories: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def delete_memory(point_id):
|
||||
"""Delete a memory from Qdrant"""
|
||||
|
||||
delete_body = {
|
||||
"points": [point_id]
|
||||
}
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points/delete?wait=true",
|
||||
data=json.dumps(delete_body).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("status") == "ok"
|
||||
except Exception as e:
|
||||
print(f"Error deleting memory {point_id}: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def update_access_count(point_id):
|
||||
"""Increment access count for a memory"""
|
||||
# This would require reading then writing the point
|
||||
# Simplified: just update last_accessed
|
||||
pass
|
||||
|
||||
def check_decay():
|
||||
"""Check what memories are expired or stale"""
|
||||
print("🔍 Memory Decay Check")
|
||||
print("=" * 40)
|
||||
|
||||
expired = get_expired_memories()
|
||||
print(f"\n📅 Expired memories: {len(expired)}")
|
||||
for m in expired:
|
||||
text = m["payload"].get("text", "")[:60]
|
||||
expires = m["payload"].get("expires_at", "unknown")
|
||||
print(f" [{expires}] {text}...")
|
||||
|
||||
stale = get_stale_memories(90)
|
||||
print(f"\n🕐 Stale memories (90+ days): {len(stale)}")
|
||||
for m in stale:
|
||||
text = m["payload"].get("text", "")[:60]
|
||||
last_access = m["payload"].get("last_accessed", "unknown")
|
||||
print(f" [{last_access[:10]}] {text}...")
|
||||
|
||||
return expired, stale
|
||||
|
||||
def cleanup_memories(dry_run=True):
|
||||
"""Remove expired and very stale memories"""
|
||||
print("🧹 Memory Cleanup")
|
||||
print("=" * 40)
|
||||
|
||||
if dry_run:
|
||||
print("(DRY RUN - no actual deletions)")
|
||||
|
||||
expired = get_expired_memories()
|
||||
deleted = 0
|
||||
|
||||
print(f"\nDeleting {len(expired)} expired memories...")
|
||||
for m in expired:
|
||||
point_id = m["id"]
|
||||
text = m["payload"].get("text", "")[:40]
|
||||
|
||||
if not dry_run:
|
||||
if delete_memory(point_id):
|
||||
print(f" ✅ Deleted: {text}...")
|
||||
deleted += 1
|
||||
else:
|
||||
print(f" ❌ Failed: {text}...")
|
||||
else:
|
||||
print(f" [would delete] {text}...")
|
||||
|
||||
# Only delete very stale (180 days) low-importance memories
|
||||
very_stale = get_stale_memories(180)
|
||||
|
||||
print(f"\nDeleting {len(very_stale)} very stale (180+ days) low-importance memories...")
|
||||
for m in very_stale:
|
||||
point_id = m["id"]
|
||||
text = m["payload"].get("text", "")[:40]
|
||||
|
||||
if not dry_run:
|
||||
if delete_memory(point_id):
|
||||
print(f" ✅ Deleted: {text}...")
|
||||
deleted += 1
|
||||
else:
|
||||
print(f" ❌ Failed: {text}...")
|
||||
else:
|
||||
print(f" [would delete] {text}...")
|
||||
|
||||
if dry_run:
|
||||
print(f"\n⚠️ This was a dry run. Use --no-dry-run to actually delete.")
|
||||
else:
|
||||
print(f"\n✅ Deleted {deleted} memories")
|
||||
|
||||
return deleted
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Memory decay management")
|
||||
parser.add_argument("action", choices=["check", "cleanup", "status"])
|
||||
parser.add_argument("--no-dry-run", action="store_true", help="Actually delete (default is dry run)")
|
||||
parser.add_argument("--days", type=int, default=90, help="Days for stale threshold")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.action == "check":
|
||||
expired, stale = check_decay()
|
||||
total = len(expired) + len(stale)
|
||||
print(f"\n📊 Total decayed memories: {total}")
|
||||
sys.exit(0 if total == 0 else 1)
|
||||
|
||||
elif args.action == "cleanup":
|
||||
deleted = cleanup_memories(dry_run=not args.no_dry_run)
|
||||
sys.exit(0)
|
||||
|
||||
elif args.action == "status":
|
||||
expired, stale = check_decay()
|
||||
print(f"\n📊 Decay Status")
|
||||
print(f" Expired: {len(expired)}")
|
||||
print(f" Stale ({args.days}+ days): {len(stale)}")
|
||||
print(f" Total decayed: {len(expired) + len(stale)}")
|
||||
207
skills/qdrant-memory/scripts/monitor_ollama_models.py
Executable file
207
skills/qdrant-memory/scripts/monitor_ollama_models.py
Executable file
@@ -0,0 +1,207 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Monitor Ollama model library for 100B+ parameter models
|
||||
Only outputs/announces when there are significant new large models.
|
||||
Always exits with code 0 to prevent "exec failed" logs.
|
||||
Usage: monitor_ollama_models.py [--json]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import json
|
||||
import urllib.request
|
||||
import re
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
KB_COLLECTION = "knowledge_base"
|
||||
OLLAMA_LIBRARY_URL = "https://ollama.com/library"
|
||||
|
||||
LARGE_MODEL_TAGS = ["100b", "120b", "200b", "400b", "70b", "8x7b", "8x22b"]
|
||||
GOOD_FOR_OPENCLAW = ["code", "coding", "instruct", "chat", "reasoning", "llama", "qwen", "mistral", "deepseek", "gemma", "mixtral"]
|
||||
|
||||
def fetch_library():
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
|
||||
req = urllib.request.Request(OLLAMA_LIBRARY_URL, headers=headers)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=20) as response:
|
||||
return response.read().decode('utf-8', errors='ignore')
|
||||
except:
|
||||
return None
|
||||
|
||||
def extract_models(html):
|
||||
models = []
|
||||
model_blocks = re.findall(r'<a[^>]*href="/library/([^"]+)"[^>]*>(.*?)</a>', html, re.DOTALL)
|
||||
|
||||
for model_name, block in model_blocks[:50]:
|
||||
model_info = {
|
||||
"name": model_name, "url": f"https://ollama.com/library/{model_name}",
|
||||
"is_large": False, "is_new": False, "tags": [], "description": ""
|
||||
}
|
||||
|
||||
tag_matches = re.findall(r'<span[^>]*>([^<]+(?:b|B))</span>', block)
|
||||
model_info["tags"] = [t.lower() for t in tag_matches]
|
||||
|
||||
for tag in model_info["tags"]:
|
||||
if any(large_tag in tag for large_tag in LARGE_MODEL_TAGS):
|
||||
if "70b" in tag and not ("8x" in model_name.lower() or "mixtral" in model_name.lower()):
|
||||
continue
|
||||
model_info["is_large"] = True
|
||||
break
|
||||
|
||||
desc_match = re.search(r'<p[^>]*>([^<]+)</p>', block)
|
||||
if desc_match:
|
||||
model_info["description"] = desc_match.group(1).strip()
|
||||
|
||||
updated_match = re.search(r'(\d+)\s+(hours?|days?)\s+ago', block, re.IGNORECASE)
|
||||
if updated_match:
|
||||
num = int(updated_match.group(1))
|
||||
unit = updated_match.group(2).lower()
|
||||
if (unit.startswith("hour") and num <= 24) or (unit.startswith("day") and num <= 2):
|
||||
model_info["is_new"] = True
|
||||
|
||||
desc_lower = model_info["description"].lower()
|
||||
name_lower = model_name.lower()
|
||||
model_info["good_for_openclaw"] = any(kw in desc_lower or kw in name_lower for kw in GOOD_FOR_OPENCLAW)
|
||||
|
||||
models.append(model_info)
|
||||
return models
|
||||
|
||||
def get_embedding(text):
|
||||
data = {"model": "nomic-embed-text", "input": text[:500]}
|
||||
req = urllib.request.Request("http://10.0.0.10:11434/api/embed",
|
||||
data=json.dumps(data).encode(),
|
||||
headers={"Content-Type": "application/json"}, method="POST")
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("embeddings", [None])[0]
|
||||
except:
|
||||
return None
|
||||
|
||||
def search_kb_for_model(model_name):
|
||||
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points/scroll"
|
||||
data = {"limit": 100, "with_payload": True, "filter": {"must": [
|
||||
{"key": "domain", "match": {"value": "AI/LLM"}},
|
||||
{"key": "path", "match": {"text": model_name}}
|
||||
]}}
|
||||
req = urllib.request.Request(url, data=json.dumps(data).encode(),
|
||||
headers={"Content-Type": "application/json"}, method="POST")
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("result", {}).get("points", [])
|
||||
except:
|
||||
return []
|
||||
|
||||
def store_model(model_info):
|
||||
import uuid
|
||||
text = f"{model_info['name']}: {model_info['description']}\nTags: {', '.join(model_info['tags'])}"
|
||||
embedding = get_embedding(text)
|
||||
if not embedding:
|
||||
return False
|
||||
|
||||
metadata = {
|
||||
"domain": "AI/LLM", "path": f"AI/LLM/Ollama/Models/{model_info['name']}",
|
||||
"subjects": ["ollama", "models", "llm", "100b+"] + model_info['tags'],
|
||||
"category": "reference", "content_type": "web_page",
|
||||
"title": f"Ollama Model: {model_info['name']}", "source_url": model_info['url'],
|
||||
"date_added": datetime.now().strftime("%Y-%m-%d"), "date_scraped": datetime.now().isoformat(),
|
||||
"model_tags": model_info['tags'], "is_large": model_info['is_large'], "is_new": model_info['is_new'],
|
||||
"text_preview": text[:300]
|
||||
}
|
||||
|
||||
point = {"id": str(uuid.uuid4()), "vector": embedding, "payload": metadata}
|
||||
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points"
|
||||
req = urllib.request.Request(url, data=json.dumps({"points": [point]}).encode(),
|
||||
headers={"Content-Type": "application/json"}, method="PUT")
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("status") == "ok"
|
||||
except:
|
||||
return False
|
||||
|
||||
def evaluate_candidate(model_info):
|
||||
score = 0
|
||||
reasons = []
|
||||
|
||||
if not model_info["is_large"]:
|
||||
return {"is_candidate": False, "score": 0, "reasons": []}
|
||||
|
||||
score += 5
|
||||
reasons.append("🦣 100B+ parameters")
|
||||
|
||||
if model_info.get("good_for_openclaw"):
|
||||
score += 2
|
||||
reasons.append("✨ Good for OpenClaw")
|
||||
|
||||
if model_info["is_new"]:
|
||||
score += 2
|
||||
reasons.append("🆕 Recently updated")
|
||||
|
||||
return {"is_candidate": score >= 5, "score": score, "reasons": reasons}
|
||||
|
||||
def format_notification(candidates):
|
||||
lines = ["🤖 New Large Model Alert (100B+)", f"📅 {datetime.now().strftime('%Y-%m-%d')}", ""]
|
||||
lines.append(f"📊 {len(candidates)} new large model(s) found:")
|
||||
lines.append("")
|
||||
|
||||
for model in candidates[:5]:
|
||||
eval_info = model["evaluation"]
|
||||
lines.append(f"• {model['name']}")
|
||||
lines.append(f" {model['description'][:60]}...")
|
||||
lines.append(f" Tags: {', '.join(model['tags'][:3])}")
|
||||
for reason in eval_info["reasons"]:
|
||||
lines.append(f" {reason}")
|
||||
lines.append(f" 🔗 {model['url']}")
|
||||
lines.append("")
|
||||
|
||||
lines.append("💡 Potential gpt-oss:120b replacement")
|
||||
return "\n".join(lines)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--json", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
html = fetch_library()
|
||||
if not html:
|
||||
if args.json:
|
||||
print("{}")
|
||||
sys.exit(0) # Silent fail with exit 0
|
||||
|
||||
models = extract_models(html)
|
||||
large_models = [m for m in models if m["is_large"]]
|
||||
|
||||
candidates = []
|
||||
|
||||
for model in large_models:
|
||||
existing = search_kb_for_model(model["name"])
|
||||
is_new_to_kb = len(existing) == 0
|
||||
|
||||
evaluation = evaluate_candidate(model)
|
||||
model["evaluation"] = evaluation
|
||||
|
||||
if is_new_to_kb:
|
||||
store_model(model)
|
||||
|
||||
if evaluation["is_candidate"] and is_new_to_kb:
|
||||
candidates.append(model)
|
||||
|
||||
# Output results
|
||||
if args.json:
|
||||
if candidates:
|
||||
print(json.dumps({"candidates": candidates, "notification": format_notification(candidates)}))
|
||||
else:
|
||||
print("{}")
|
||||
elif candidates:
|
||||
print(format_notification(candidates))
|
||||
# No output if no candidates (silent)
|
||||
|
||||
# Always exit 0 to prevent "exec failed" logs
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
249
skills/qdrant-memory/scripts/monitor_openclaw_repo.py
Executable file
249
skills/qdrant-memory/scripts/monitor_openclaw_repo.py
Executable file
@@ -0,0 +1,249 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Monitor OpenClaw GitHub repo for relevant updates
|
||||
Only outputs/announces when there are significant changes affecting our setup.
|
||||
Always exits with code 0 to prevent "exec failed" logs.
|
||||
Usage: monitor_openclaw_repo.py [--json]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import json
|
||||
import urllib.request
|
||||
import re
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
KB_COLLECTION = "knowledge_base"
|
||||
|
||||
# Keywords that indicate relevance to our setup
|
||||
RELEVANT_KEYWORDS = [
|
||||
"ollama", "model", "embedding", "llm", "ai",
|
||||
"telegram", "webchat", "signal", "discord",
|
||||
"skill", "skills", "qdrant", "memory", "search",
|
||||
"whisper", "tts", "voice", "cron",
|
||||
"gateway", "agent", "session", "vector",
|
||||
"browser", "exec", "read", "edit", "write",
|
||||
"breaking", "deprecated", "removed", "changed",
|
||||
"fix", "bug", "patch", "security", "vulnerability"
|
||||
]
|
||||
|
||||
HIGH_PRIORITY_AREAS = [
|
||||
"ollama", "telegram", "qdrant", "memory", "skills",
|
||||
"voice", "cron", "gateway", "browser"
|
||||
]
|
||||
|
||||
def fetch_github_api(url):
|
||||
headers = {
|
||||
'User-Agent': 'OpenClaw-KB-Monitor',
|
||||
'Accept': 'application/vnd.github.v3+json'
|
||||
}
|
||||
req = urllib.request.Request(url, headers=headers)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=20) as response:
|
||||
return json.loads(response.read().decode())
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
def fetch_github_html(url):
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
|
||||
req = urllib.request.Request(url, headers=headers)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=20) as response:
|
||||
html = response.read().decode('utf-8', errors='ignore')
|
||||
text = re.sub(r'<script[^>]*>.*?</script>', ' ', html, flags=re.DOTALL | re.IGNORECASE)
|
||||
text = re.sub(r'<style[^>]*>.*?</style>', ' ', text, flags=re.DOTALL | re.IGNORECASE)
|
||||
text = re.sub(r'<[^>]+>', ' ', text)
|
||||
text = re.sub(r'\s+', ' ', text).strip()
|
||||
return text[:5000]
|
||||
except:
|
||||
return None
|
||||
|
||||
def get_embedding(text):
|
||||
import json as jsonlib
|
||||
data = {"model": "nomic-embed-text", "input": text[:1000]}
|
||||
req = urllib.request.Request(
|
||||
"http://10.0.0.10:11434/api/embed",
|
||||
data=jsonlib.dumps(data).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST"
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
result = jsonlib.loads(response.read().decode())
|
||||
return result.get("embeddings", [None])[0]
|
||||
except:
|
||||
return None
|
||||
|
||||
def search_kb_by_path(path_prefix):
|
||||
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points/scroll"
|
||||
data = {"limit": 100, "with_payload": True}
|
||||
req = urllib.request.Request(url, data=json.dumps(data).encode(),
|
||||
headers={"Content-Type": "application/json"}, method="POST")
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
points = result.get("result", {}).get("points", [])
|
||||
return [p for p in points if p.get("payload", {}).get("path", "").startswith(path_prefix)]
|
||||
except:
|
||||
return []
|
||||
|
||||
def store_in_kb(text, metadata):
|
||||
import uuid
|
||||
embedding = get_embedding(text)
|
||||
if not embedding:
|
||||
return None
|
||||
metadata["checksum"] = f"sha256:{hashlib.sha256(text.encode()).hexdigest()[:16]}"
|
||||
metadata["date_scraped"] = datetime.now().isoformat()
|
||||
metadata["text_preview"] = text[:300] + "..." if len(text) > 300 else text
|
||||
point = {"id": str(uuid.uuid4()), "vector": embedding, "payload": metadata}
|
||||
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points"
|
||||
req = urllib.request.Request(url, data=json.dumps({"points": [point]}).encode(),
|
||||
headers={"Content-Type": "application/json"}, method="PUT")
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("status") == "ok"
|
||||
except:
|
||||
return False
|
||||
|
||||
def delete_kb_entry(entry_id):
|
||||
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points/delete"
|
||||
data = {"points": [entry_id]}
|
||||
req = urllib.request.Request(url, data=json.dumps(data).encode(),
|
||||
headers={"Content-Type": "application/json"}, method="POST")
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("status") == "ok"
|
||||
except:
|
||||
return False
|
||||
|
||||
def is_relevant_change(text):
|
||||
text_lower = text.lower()
|
||||
found_keywords = [kw for kw in RELEVANT_KEYWORDS if kw in text_lower]
|
||||
high_priority_found = [area for area in HIGH_PRIORITY_AREAS if area in text_lower]
|
||||
return {
|
||||
"relevant": len(found_keywords) > 0,
|
||||
"keywords": found_keywords,
|
||||
"high_priority": high_priority_found,
|
||||
"score": len(found_keywords) + (len(high_priority_found) * 2)
|
||||
}
|
||||
|
||||
def evaluate_significance(changes):
|
||||
total_score = sum(c["analysis"]["score"] for c in changes)
|
||||
high_priority_count = sum(len(c["analysis"]["high_priority"]) for c in changes)
|
||||
return {
|
||||
"significant": total_score >= 3 or high_priority_count > 0,
|
||||
"total_score": total_score,
|
||||
"high_priority_count": high_priority_count
|
||||
}
|
||||
|
||||
def format_summary(changes, significance):
|
||||
lines = ["📊 OpenClaw Repo Update", f"📅 {datetime.now().strftime('%Y-%m-%d')}", ""]
|
||||
by_section = {}
|
||||
for change in changes:
|
||||
section = change["section"]
|
||||
if section not in by_section:
|
||||
by_section[section] = []
|
||||
by_section[section].append(change)
|
||||
|
||||
for section, items in by_section.items():
|
||||
lines.append(f"📁 {section}")
|
||||
for item in items[:3]:
|
||||
title = item["title"][:50] + "..." if len(item["title"]) > 50 else item["title"]
|
||||
lines.append(f" • {title}")
|
||||
if item["analysis"]["high_priority"]:
|
||||
lines.append(f" ⚠️ Affects: {', '.join(item['analysis']['high_priority'][:2])}")
|
||||
if len(items) > 3:
|
||||
lines.append(f" ... and {len(items) - 3} more")
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
def scrape_all_sections():
|
||||
sections = []
|
||||
main_text = fetch_github_html("https://github.com/openclaw/openclaw")
|
||||
if main_text:
|
||||
sections.append({"section": "Main Repo", "title": "openclaw/openclaw README",
|
||||
"url": "https://github.com/openclaw/openclaw", "content": main_text})
|
||||
|
||||
releases = fetch_github_api("https://api.github.com/repos/openclaw/openclaw/releases?per_page=5")
|
||||
if releases:
|
||||
for release in releases:
|
||||
sections.append({"section": "Release", "title": release.get("name", release.get("tag_name", "Unknown")),
|
||||
"url": release.get("html_url", ""), "content": release.get("body", "")[:2000],
|
||||
"published": release.get("published_at", "")})
|
||||
|
||||
issues = fetch_github_api("https://api.github.com/repos/openclaw/openclaw/issues?state=open&per_page=5")
|
||||
if issues:
|
||||
for issue in issues:
|
||||
if "pull_request" not in issue:
|
||||
sections.append({"section": "Issue", "title": issue.get("title", "Unknown"),
|
||||
"url": issue.get("html_url", ""), "content": issue.get("body", "")[:1500] if issue.get("body") else "No description",
|
||||
"labels": [l.get("name", "") for l in issue.get("labels", [])]})
|
||||
return sections
|
||||
|
||||
def check_and_update():
|
||||
sections = scrape_all_sections()
|
||||
if not sections:
|
||||
return None, "No data scraped"
|
||||
|
||||
existing_entries = search_kb_by_path("OpenClaw/GitHub")
|
||||
existing_checksums = {e.get("payload", {}).get("checksum", ""): e for e in existing_entries}
|
||||
changes_detected = []
|
||||
|
||||
for section in sections:
|
||||
content = section["content"]
|
||||
if not content:
|
||||
continue
|
||||
checksum = f"sha256:{hashlib.sha256(content.encode()).hexdigest()[:16]}"
|
||||
if checksum in existing_checksums:
|
||||
continue
|
||||
|
||||
analysis = is_relevant_change(content + " " + section["title"])
|
||||
section["analysis"] = analysis
|
||||
section["checksum"] = checksum
|
||||
changes_detected.append(section)
|
||||
|
||||
for old_checksum, old_entry in existing_checksums.items():
|
||||
if old_entry.get("payload", {}).get("title", "") == section["title"]:
|
||||
delete_kb_entry(old_entry.get("id"))
|
||||
break
|
||||
|
||||
metadata = {
|
||||
"domain": "OpenClaw", "path": f"OpenClaw/GitHub/{section['section']}/{section['title'][:30]}",
|
||||
"subjects": ["openclaw", "github", section['section'].lower()], "category": "reference",
|
||||
"content_type": "web_page", "title": section["title"], "source_url": section["url"],
|
||||
"date_added": datetime.now().strftime("%Y-%m-%d")
|
||||
}
|
||||
store_in_kb(content, metadata)
|
||||
|
||||
if changes_detected:
|
||||
significance = evaluate_significance(changes_detected)
|
||||
if significance["significant"]:
|
||||
return {"changes": changes_detected, "significance": significance,
|
||||
"summary": format_summary(changes_detected, significance)}, None
|
||||
else:
|
||||
return None, "Changes not significant"
|
||||
return None, "No changes detected"
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--json", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
result, reason = check_and_update()
|
||||
|
||||
# Always output JSON for cron compatibility, even if empty
|
||||
if args.json:
|
||||
print(json.dumps(result if result else {}))
|
||||
elif result:
|
||||
print(result["summary"])
|
||||
# If no result, output nothing (silent)
|
||||
|
||||
# Always exit 0 to prevent "exec failed" logs
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
65
skills/qdrant-memory/scripts/notify_check.py
Executable file
65
skills/qdrant-memory/scripts/notify_check.py
Executable file
@@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Lightweight notification checker for agent messages
|
||||
Cron job: Check Redis stream hourly, notify if new messages
|
||||
"""
|
||||
|
||||
import json
|
||||
import redis
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
|
||||
REDIS_HOST = "10.0.0.36"
|
||||
REDIS_PORT = 6379
|
||||
STREAM_NAME = "agent-messages"
|
||||
LAST_NOTIFIED_KEY = "agent:notifications:last_id"
|
||||
|
||||
# Simple stdout notification (OpenClaw captures stdout for alerts)
|
||||
def notify(messages):
|
||||
if not messages:
|
||||
return
|
||||
|
||||
other_agent = messages[0].get("agent", "Agent")
|
||||
count = len(messages)
|
||||
|
||||
# Single line notification - minimal tokens
|
||||
print(f"📨 {other_agent}: {count} new message(s) in agent-messages")
|
||||
|
||||
# Optional: preview first message (uncomment if wanted)
|
||||
# if messages:
|
||||
# preview = messages[0].get("message", "")[:50]
|
||||
# print(f" Latest: {preview}...")
|
||||
|
||||
def check_notifications():
|
||||
r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, decode_responses=True)
|
||||
|
||||
# Get last position we notified about
|
||||
last_id = r.get(LAST_NOTIFIED_KEY) or "0"
|
||||
|
||||
# Read new messages since last notification
|
||||
result = r.xread({STREAM_NAME: last_id}, block=100, count=100)
|
||||
|
||||
if not result:
|
||||
return # No new messages, silent exit
|
||||
|
||||
messages = []
|
||||
new_last_id = last_id
|
||||
|
||||
for stream_name, entries in result:
|
||||
for msg_id, data in entries:
|
||||
messages.append(data)
|
||||
new_last_id = msg_id
|
||||
|
||||
if messages:
|
||||
# Filter out our own messages (don't notify about messages we sent)
|
||||
my_agent = os.environ.get("AGENT_NAME", "Kimi") # Set in cron env
|
||||
other_messages = [m for m in messages if m.get("agent") != my_agent]
|
||||
|
||||
if other_messages:
|
||||
notify(other_messages)
|
||||
|
||||
# Update last notified position regardless
|
||||
r.set(LAST_NOTIFIED_KEY, new_last_id)
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_notifications()
|
||||
220
skills/qdrant-memory/scripts/scrape_to_kb.py
Executable file
220
skills/qdrant-memory/scripts/scrape_to_kb.py
Executable file
@@ -0,0 +1,220 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Scrape web content and store in knowledge_base collection
|
||||
Usage: scrape_to_kb.py <url> <domain> <path> [--title "Title"] [--subjects "a,b,c"]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import re
|
||||
import hashlib
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
from html import unescape
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "knowledge_base"
|
||||
OLLAMA_EMBED_URL = "http://10.0.0.10:11434/api/embed"
|
||||
|
||||
def fetch_url(url):
|
||||
"""Fetch URL content"""
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||||
}
|
||||
req = urllib.request.Request(url, headers=headers)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
return response.read().decode('utf-8', errors='ignore')
|
||||
except Exception as e:
|
||||
print(f"❌ Error fetching {url}: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def extract_text(html):
|
||||
"""Extract clean text from HTML"""
|
||||
# Remove script and style tags
|
||||
html = re.sub(r'<script[^>]*>.*?</script>', ' ', html, flags=re.DOTALL | re.IGNORECASE)
|
||||
html = re.sub(r'<style[^>]*>.*?</style>', ' ', html, flags=re.DOTALL | re.IGNORECASE)
|
||||
|
||||
# Extract title
|
||||
title_match = re.search(r'<title[^>]*>([^<]*)</title>', html, re.IGNORECASE)
|
||||
title = title_match.group(1).strip() if title_match else "Untitled"
|
||||
title = unescape(title)
|
||||
|
||||
# Remove nav/header/footer common patterns
|
||||
html = re.sub(r'<nav[^>]*>.*?</nav>', ' ', html, flags=re.DOTALL | re.IGNORECASE)
|
||||
html = re.sub(r'<header[^>]*>.*?</header>', ' ', html, flags=re.DOTALL | re.IGNORECASE)
|
||||
html = re.sub(r'<footer[^>]*>.*?</footer>', ' ', html, flags=re.DOTALL | re.IGNORECASE)
|
||||
|
||||
# Convert common block elements to newlines
|
||||
html = re.sub(r'</(p|div|h[1-6]|li|tr)>', '\n', html, flags=re.IGNORECASE)
|
||||
html = re.sub(r'<br\s*/?>', '\n', html, flags=re.IGNORECASE)
|
||||
|
||||
# Remove all remaining tags
|
||||
text = re.sub(r'<[^>]+>', ' ', html)
|
||||
|
||||
# Clean up whitespace
|
||||
text = unescape(text)
|
||||
text = re.sub(r'\n\s*\n', '\n\n', text)
|
||||
text = re.sub(r'[ \t]+', ' ', text)
|
||||
text = '\n'.join(line.strip() for line in text.split('\n'))
|
||||
text = '\n'.join(line for line in text.split('\n') if line)
|
||||
|
||||
return title, text
|
||||
|
||||
def chunk_text(text, max_chars=2000, overlap=200):
|
||||
"""Split text into overlapping chunks"""
|
||||
chunks = []
|
||||
start = 0
|
||||
|
||||
while start < len(text):
|
||||
end = start + max_chars
|
||||
|
||||
# Try to break at sentence or paragraph
|
||||
if end < len(text):
|
||||
# Look for paragraph break
|
||||
para_break = text.rfind('\n\n', start, end)
|
||||
if para_break > start + 500:
|
||||
end = para_break
|
||||
else:
|
||||
# Look for sentence break
|
||||
sent_break = max(
|
||||
text.rfind('. ', start, end),
|
||||
text.rfind('? ', start, end),
|
||||
text.rfind('! ', start, end)
|
||||
)
|
||||
if sent_break > start + 500:
|
||||
end = sent_break + 1
|
||||
|
||||
chunk = text[start:end].strip()
|
||||
if len(chunk) > 100: # Skip tiny chunks
|
||||
chunks.append(chunk)
|
||||
|
||||
start = end - overlap
|
||||
if start >= len(text):
|
||||
break
|
||||
|
||||
return chunks
|
||||
|
||||
def get_embedding(text):
|
||||
"""Generate embedding via Ollama"""
|
||||
import json
|
||||
data = {
|
||||
"model": "nomic-embed-text",
|
||||
"input": text
|
||||
}
|
||||
req = urllib.request.Request(
|
||||
OLLAMA_EMBED_URL,
|
||||
data=json.dumps(data).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST"
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=60) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("embeddings", [None])[0]
|
||||
except Exception as e:
|
||||
print(f"❌ Error generating embedding: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def compute_checksum(text):
|
||||
"""Compute SHA256 checksum"""
|
||||
return f"sha256:{hashlib.sha256(text.encode()).hexdigest()}"
|
||||
|
||||
def store_in_kb(text, metadata):
|
||||
"""Store chunk in knowledge_base"""
|
||||
import json
|
||||
import uuid
|
||||
|
||||
embedding = get_embedding(text)
|
||||
if not embedding:
|
||||
return False
|
||||
|
||||
point = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"vector": embedding,
|
||||
"payload": metadata
|
||||
}
|
||||
|
||||
url = f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points"
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=json.dumps({"points": [point]}).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="PUT"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("status") == "ok"
|
||||
except Exception as e:
|
||||
print(f"❌ Error storing: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Scrape URL to knowledge base")
|
||||
parser.add_argument("url", help="URL to scrape")
|
||||
parser.add_argument("domain", help="Knowledge domain (e.g., Python, OpenClaw)")
|
||||
parser.add_argument("path", help="Hierarchical path (e.g., OpenClaw/Docs/Overview)")
|
||||
parser.add_argument("--title", help="Override title")
|
||||
parser.add_argument("--subjects", help="Comma-separated subjects")
|
||||
parser.add_argument("--category", default="reference", help="Category: reference|tutorial|snippet|troubleshooting|concept")
|
||||
parser.add_argument("--content-type", default="web_page", help="Content type: web_page|code|markdown|pdf|note")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"🔍 Fetching {args.url}...")
|
||||
html = fetch_url(args.url)
|
||||
if not html:
|
||||
sys.exit(1)
|
||||
|
||||
print("✂️ Extracting text...")
|
||||
title, text = extract_text(html)
|
||||
if args.title:
|
||||
title = args.title
|
||||
|
||||
print(f"📄 Title: {title}")
|
||||
print(f"📝 Content length: {len(text)} chars")
|
||||
|
||||
if len(text) < 200:
|
||||
print("❌ Content too short, skipping", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print("🧩 Chunking...")
|
||||
chunks = chunk_text(text)
|
||||
print(f" {len(chunks)} chunks")
|
||||
|
||||
subjects = [s.strip() for s in args.subjects.split(",")] if args.subjects else []
|
||||
checksum = compute_checksum(text)
|
||||
date_added = "2026-02-05"
|
||||
|
||||
print("💾 Storing chunks...")
|
||||
stored = 0
|
||||
for i, chunk in enumerate(chunks):
|
||||
chunk_metadata = {
|
||||
"domain": args.domain,
|
||||
"path": f"{args.path}/chunk-{i+1}",
|
||||
"subjects": subjects,
|
||||
"category": args.category,
|
||||
"content_type": args.content_type,
|
||||
"title": f"{title} (part {i+1}/{len(chunks)})",
|
||||
"checksum": checksum,
|
||||
"source_url": args.url,
|
||||
"date_added": date_added,
|
||||
"chunk_index": i + 1,
|
||||
"total_chunks": len(chunks),
|
||||
"text_preview": chunk[:200] + "..." if len(chunk) > 200 else chunk
|
||||
}
|
||||
|
||||
if store_in_kb(chunk, chunk_metadata):
|
||||
stored += 1
|
||||
print(f" ✓ Chunk {i+1}/{len(chunks)}")
|
||||
else:
|
||||
print(f" ✗ Chunk {i+1}/{len(chunks)} failed")
|
||||
|
||||
print(f"\n🎉 Stored {stored}/{len(chunks)} chunks in knowledge_base")
|
||||
print(f" Domain: {args.domain}")
|
||||
print(f" Path: {args.path}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
187
skills/qdrant-memory/scripts/search_memories.py
Executable file
187
skills/qdrant-memory/scripts/search_memories.py
Executable file
@@ -0,0 +1,187 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Search memories by semantic similarity in Qdrant
|
||||
Usage: search_memories.py "Query text" [--limit 5] [--filter-tag tag] [--track-access]
|
||||
|
||||
Now with access tracking - updates access_count and last_accessed when memories are retrieved.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import urllib.request
|
||||
from datetime import datetime
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "kimi_memories"
|
||||
OLLAMA_URL = "http://10.0.0.10:11434/v1"
|
||||
|
||||
def get_embedding(text):
|
||||
"""Generate embedding using snowflake-arctic-embed2 via Ollama"""
|
||||
data = json.dumps({
|
||||
"model": "snowflake-arctic-embed2",
|
||||
"input": text[:8192]
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{OLLAMA_URL}/embeddings",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result["data"][0]["embedding"]
|
||||
except Exception as e:
|
||||
print(f"Error generating embedding: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def update_access_stats(point_id, current_payload):
|
||||
"""Update access_count and last_accessed for a memory"""
|
||||
|
||||
# Get current values or defaults
|
||||
access_count = current_payload.get("access_count", 0) + 1
|
||||
last_accessed = datetime.now().isoformat()
|
||||
|
||||
# Prepare update payload
|
||||
update_body = {
|
||||
"points": [
|
||||
{
|
||||
"id": point_id,
|
||||
"payload": {
|
||||
"access_count": access_count,
|
||||
"last_accessed": last_accessed
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points/payload?wait=true",
|
||||
data=json.dumps(update_body).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="PUT"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=5) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("status") == "ok"
|
||||
except Exception as e:
|
||||
# Silently fail - don't break search if update fails
|
||||
return False
|
||||
|
||||
def search_memories(query_vector, limit=5, tag_filter=None, track_access=True):
|
||||
"""Search memories in Qdrant with optional access tracking"""
|
||||
|
||||
search_body = {
|
||||
"vector": query_vector,
|
||||
"limit": limit,
|
||||
"with_payload": True,
|
||||
"with_vector": False
|
||||
}
|
||||
|
||||
# Add filter if tag specified
|
||||
if tag_filter:
|
||||
search_body["filter"] = {
|
||||
"must": [
|
||||
{
|
||||
"key": "tags",
|
||||
"match": {
|
||||
"value": tag_filter
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points/search",
|
||||
data=json.dumps(search_body).encode(),
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
results = result.get("result", [])
|
||||
|
||||
# Track access for retrieved memories
|
||||
if track_access and results:
|
||||
for r in results:
|
||||
point_id = r.get("id")
|
||||
payload = r.get("payload", {})
|
||||
if point_id:
|
||||
update_access_stats(point_id, payload)
|
||||
|
||||
return results
|
||||
except Exception as e:
|
||||
print(f"Error searching memories: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Search memories by semantic similarity")
|
||||
parser.add_argument("query", help="Search query text")
|
||||
parser.add_argument("--limit", type=int, default=5, help="Number of results (default: 5)")
|
||||
parser.add_argument("--filter-tag", help="Filter by tag")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
parser.add_argument("--no-track", action="store_true", help="Don't update access stats")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"Generating query embedding...", file=sys.stderr)
|
||||
query_vector = get_embedding(args.query)
|
||||
|
||||
if query_vector is None:
|
||||
print("❌ Failed to generate embedding", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Searching Qdrant...", file=sys.stderr)
|
||||
results = search_memories(query_vector, args.limit, args.filter_tag, track_access=not args.no_track)
|
||||
|
||||
if not results:
|
||||
print("No matching memories found.")
|
||||
sys.exit(0)
|
||||
|
||||
if args.json:
|
||||
# JSON output with all metadata
|
||||
output = []
|
||||
for r in results:
|
||||
payload = r["payload"]
|
||||
output.append({
|
||||
"id": r.get("id"),
|
||||
"score": r["score"],
|
||||
"text": payload.get("text", ""),
|
||||
"date": payload.get("date", ""),
|
||||
"tags": payload.get("tags", []),
|
||||
"importance": payload.get("importance", "medium"),
|
||||
"confidence": payload.get("confidence", "medium"),
|
||||
"verified": payload.get("verified", False),
|
||||
"source_type": payload.get("source_type", "inferred"),
|
||||
"access_count": payload.get("access_count", 0),
|
||||
"last_accessed": payload.get("last_accessed", ""),
|
||||
"expires_at": payload.get("expires_at", None)
|
||||
})
|
||||
print(json.dumps(output, indent=2))
|
||||
else:
|
||||
# Human-readable output
|
||||
print(f"\n🔍 Found {len(results)} similar memories:\n")
|
||||
for i, r in enumerate(results, 1):
|
||||
payload = r["payload"]
|
||||
score = r["score"]
|
||||
text = payload.get("text", "")[:200]
|
||||
if len(payload.get("text", "")) > 200:
|
||||
text += "..."
|
||||
date = payload.get("date", "unknown")
|
||||
tags = ", ".join(payload.get("tags", []))
|
||||
importance = payload.get("importance", "medium")
|
||||
access_count = payload.get("access_count", 0)
|
||||
verified = "✓" if payload.get("verified", False) else "?"
|
||||
|
||||
print(f"{i}. [{date}] (score: {score:.3f}) [{importance}] {verified}")
|
||||
print(f" {text}")
|
||||
if tags:
|
||||
print(f" Tags: {tags}")
|
||||
if access_count > 0:
|
||||
print(f" Accessed: {access_count} times")
|
||||
print()
|
||||
211
skills/qdrant-memory/scripts/smart_parser.py
Executable file
211
skills/qdrant-memory/scripts/smart_parser.py
Executable file
@@ -0,0 +1,211 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Smart Parser - BeautifulSoup with CSS selectors for custom extraction
|
||||
Usage: smart_parser.py <url> --selector "article .content" --domain "Blog" --path "Tech/AI"
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from bs4 import BeautifulSoup
|
||||
import urllib.request
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from scrape_to_kb import chunk_text, get_embedding, compute_checksum, store_in_kb, fetch_url
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "knowledge_base"
|
||||
|
||||
def parse_with_selectors(html, selectors):
|
||||
"""Extract content using CSS selectors"""
|
||||
soup = BeautifulSoup(html, 'lxml')
|
||||
|
||||
# Default: get title
|
||||
title_tag = soup.find('title')
|
||||
title = title_tag.get_text().strip() if title_tag else "Untitled"
|
||||
|
||||
results = {
|
||||
"title": title,
|
||||
"content": "",
|
||||
"sections": [],
|
||||
"metadata": {}
|
||||
}
|
||||
|
||||
for name, selector in selectors.items():
|
||||
if name == "_content":
|
||||
# Main content selector
|
||||
elements = soup.select(selector)
|
||||
if elements:
|
||||
results["content"] = "\n\n".join(el.get_text(separator='\n', strip=True) for el in elements)
|
||||
elif name == "_title":
|
||||
# Title override selector
|
||||
el = soup.select_one(selector)
|
||||
if el:
|
||||
results["title"] = el.get_text(strip=True)
|
||||
elif name.startswith("_"):
|
||||
# Special selectors
|
||||
if name == "_code_blocks":
|
||||
# Extract code separately
|
||||
code_blocks = soup.select(selector)
|
||||
results["metadata"]["code_blocks"] = [
|
||||
{"lang": el.get('class', [''])[0].replace('language-', '').replace('lang-', ''),
|
||||
"code": el.get_text()}
|
||||
for el in code_blocks
|
||||
]
|
||||
elif name == "_links":
|
||||
links = soup.select(selector)
|
||||
results["metadata"]["links"] = [
|
||||
{"text": el.get_text(strip=True), "href": el.get('href')}
|
||||
for el in links if el.get('href')
|
||||
]
|
||||
else:
|
||||
# Named section
|
||||
elements = soup.select(selector)
|
||||
if elements:
|
||||
section_text = "\n\n".join(el.get_text(separator='\n', strip=True) for el in elements)
|
||||
results["sections"].append({"name": name, "content": section_text})
|
||||
|
||||
# If no content selector matched, try to auto-extract main content
|
||||
if not results["content"]:
|
||||
# Try common content selectors
|
||||
for sel in ['main', 'article', '[role="main"]', '.content', '.post', '.entry', '#content']:
|
||||
el = soup.select_one(sel)
|
||||
if el:
|
||||
# Remove nav/footer from content
|
||||
for unwanted in el.find_all(['nav', 'footer', 'aside', 'header']):
|
||||
unwanted.decompose()
|
||||
results["content"] = el.get_text(separator='\n', strip=True)
|
||||
break
|
||||
|
||||
# Fallback: body minus nav/header/footer
|
||||
if not results["content"]:
|
||||
body = soup.find('body')
|
||||
if body:
|
||||
for unwanted in body.find_all(['nav', 'header', 'footer', 'aside', 'script', 'style']):
|
||||
unwanted.decompose()
|
||||
results["content"] = body.get_text(separator='\n', strip=True)
|
||||
|
||||
return results
|
||||
|
||||
def format_extracted(data, include_sections=True):
|
||||
"""Format extracted data into clean text"""
|
||||
parts = []
|
||||
|
||||
# Title
|
||||
parts.append(f"# {data['title']}\n")
|
||||
|
||||
# Content
|
||||
if data["content"]:
|
||||
parts.append(data["content"])
|
||||
|
||||
# Sections
|
||||
if include_sections and data["sections"]:
|
||||
for section in data["sections"]:
|
||||
parts.append(f"\n## {section['name']}\n")
|
||||
parts.append(section["content"])
|
||||
|
||||
# Metadata
|
||||
if data["metadata"].get("code_blocks"):
|
||||
parts.append("\n\n## Code Examples\n")
|
||||
for cb in data["metadata"]["code_blocks"]:
|
||||
lang = cb["lang"] or "text"
|
||||
parts.append(f"\n```{lang}\n{cb['code']}\n```\n")
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Smart HTML parser with CSS selectors")
|
||||
parser.add_argument("url", help="URL to parse")
|
||||
parser.add_argument("--domain", required=True, help="Knowledge domain")
|
||||
parser.add_argument("--path", required=True, help="Hierarchical path")
|
||||
parser.add_argument("--selector", "-s", action='append', nargs=2, metavar=('NAME', 'CSS'),
|
||||
help="CSS selector (e.g., -s content article -s title h1)")
|
||||
parser.add_argument("--content-only", action="store_true", help="Only extract main content")
|
||||
parser.add_argument("--title-selector", help="CSS selector for title")
|
||||
parser.add_argument("--remove", action='append', help="Selectors to remove")
|
||||
parser.add_argument("--category", default="reference")
|
||||
parser.add_argument("--content-type", default="web_page")
|
||||
parser.add_argument("--subjects", help="Comma-separated subjects")
|
||||
parser.add_argument("--title", help="Override title")
|
||||
parser.add_argument("--output", "-o", help="Save to file instead of KB")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Build selectors dict
|
||||
selectors = {}
|
||||
if args.selector:
|
||||
for name, css in args.selector:
|
||||
selectors[name] = css
|
||||
|
||||
if args.content_only:
|
||||
selectors["_content"] = "main, article, [role='main'], .content, .post, .entry, #content, body"
|
||||
|
||||
if args.title_selector:
|
||||
selectors["_title"] = args.title_selector
|
||||
|
||||
if args.remove:
|
||||
selectors["_remove"] = ", ".join(args.remove)
|
||||
|
||||
print(f"🔍 Fetching {args.url}...")
|
||||
html = fetch_url(args.url)
|
||||
if not html:
|
||||
sys.exit(1)
|
||||
|
||||
print("🔧 Parsing...")
|
||||
data = parse_with_selectors(html, selectors)
|
||||
|
||||
if args.title:
|
||||
data["title"] = args.title
|
||||
|
||||
text = format_extracted(data)
|
||||
|
||||
print(f"📄 Title: {data['title']}")
|
||||
print(f"📝 Content: {len(text)} chars")
|
||||
print(f"📊 Sections: {len(data['sections'])}")
|
||||
|
||||
if args.output:
|
||||
with open(args.output, 'w') as f:
|
||||
f.write(text)
|
||||
print(f"💾 Saved to {args.output}")
|
||||
return
|
||||
|
||||
if len(text) < 200:
|
||||
print("❌ Content too short", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
chunks = chunk_text(text)
|
||||
print(f"🧩 Chunks: {len(chunks)}")
|
||||
|
||||
subjects = [s.strip() for s in args.subjects.split(",")] if args.subjects else []
|
||||
checksum = compute_checksum(text)
|
||||
|
||||
print("💾 Storing...")
|
||||
stored = 0
|
||||
for i, chunk in enumerate(chunks):
|
||||
chunk_metadata = {
|
||||
"domain": args.domain,
|
||||
"path": f"{args.path}/chunk-{i+1}",
|
||||
"subjects": subjects,
|
||||
"category": args.category,
|
||||
"content_type": args.content_type,
|
||||
"title": f"{data['title']} (part {i+1}/{len(chunks)})",
|
||||
"checksum": checksum,
|
||||
"source_url": args.url,
|
||||
"date_added": "2026-02-05",
|
||||
"chunk_index": i + 1,
|
||||
"total_chunks": len(chunks),
|
||||
"text_preview": chunk[:200] + "..." if len(chunk) > 200 else chunk,
|
||||
"scraper_type": "smart_parser_bs4",
|
||||
"extracted_sections": [s["name"] for s in data["sections"]]
|
||||
}
|
||||
|
||||
if store_in_kb(chunk, chunk_metadata):
|
||||
stored += 1
|
||||
print(f" ✓ Chunk {i+1}")
|
||||
|
||||
print(f"\n🎉 Stored {stored}/{len(chunks)} chunks")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
321
skills/qdrant-memory/scripts/smart_search.py
Executable file
321
skills/qdrant-memory/scripts/smart_search.py
Executable file
@@ -0,0 +1,321 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Hybrid search: knowledge_base first, then web search, store new findings.
|
||||
Usage: smart_search.py "query" [--domain "Domain"] [--min-kb-score 0.5] [--store-new]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import json
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
OLLAMA_EMBED_URL = "http://10.0.0.10:11434/api/embed"
|
||||
SEARXNG_URL = "http://10.0.0.8:8888"
|
||||
KB_COLLECTION = "knowledge_base"
|
||||
|
||||
def get_embedding(text):
|
||||
"""Generate embedding via Ollama"""
|
||||
data = {
|
||||
"model": "nomic-embed-text",
|
||||
"input": text[:1000] # Limit for speed
|
||||
}
|
||||
req = urllib.request.Request(
|
||||
OLLAMA_EMBED_URL,
|
||||
data=json.dumps(data).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST"
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("embeddings", [None])[0]
|
||||
except Exception as e:
|
||||
print(f"⚠️ Embedding error: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def search_knowledge_base(query, domain=None, limit=5, min_score=0.5):
|
||||
"""Search knowledge base via vector similarity"""
|
||||
embedding = get_embedding(query)
|
||||
if not embedding:
|
||||
return []
|
||||
|
||||
search_data = {
|
||||
"vector": embedding,
|
||||
"limit": limit,
|
||||
"with_payload": True
|
||||
}
|
||||
|
||||
# Note: score_threshold filters aggressively; we filter client-side instead
|
||||
# to show users what scores were returned
|
||||
|
||||
if domain:
|
||||
search_data["filter"] = {
|
||||
"must": [{"key": "domain", "match": {"value": domain}}]
|
||||
}
|
||||
|
||||
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points/search"
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=json.dumps(search_data).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
results = result.get("result", [])
|
||||
# Filter by min_score client-side
|
||||
return [r for r in results if r.get("score", 0) >= min_score]
|
||||
except Exception as e:
|
||||
print(f"⚠️ KB search error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def web_search(query, limit=5):
|
||||
"""Search via SearXNG"""
|
||||
encoded_query = urllib.parse.quote(query)
|
||||
url = f"{SEARXNG_URL}/?q={encoded_query}&format=json&safesearch=0"
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"Accept": "application/json"})
|
||||
with urllib.request.urlopen(req, timeout=15) as response:
|
||||
data = json.loads(response.read().decode())
|
||||
return data.get("results", [])[:limit]
|
||||
except Exception as e:
|
||||
print(f"⚠️ Web search error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def fetch_and_extract(url):
|
||||
"""Fetch URL and extract clean text"""
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
|
||||
req = urllib.request.Request(url, headers=headers)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=20) as response:
|
||||
html = response.read().decode('utf-8', errors='ignore')
|
||||
|
||||
# Extract title
|
||||
title_match = re.search(r'<title[^>]*>([^<]*)</title>', html, re.IGNORECASE)
|
||||
title = title_match.group(1).strip() if title_match else "Untitled"
|
||||
|
||||
# Clean HTML
|
||||
html = re.sub(r'<script[^>]*>.*?</script>', ' ', html, flags=re.DOTALL | re.IGNORECASE)
|
||||
html = re.sub(r'<style[^>]*>.*?</style>', ' ', html, flags=re.DOTALL | re.IGNORECASE)
|
||||
html = re.sub(r'<[^>]+>', ' ', html)
|
||||
text = re.sub(r'\s+', ' ', html).strip()
|
||||
|
||||
return title, text[:3000] # Limit content
|
||||
except Exception as e:
|
||||
return None, None
|
||||
|
||||
def is_substantial(text, min_length=500):
|
||||
"""Check if content is substantial enough to store"""
|
||||
return len(text) >= min_length
|
||||
|
||||
def is_unique_content(text, kb_results, similarity_threshold=0.8):
|
||||
"""Check if content is unique compared to existing KB entries"""
|
||||
if not kb_results:
|
||||
return True
|
||||
|
||||
# Simple check: if any KB result has very similar content, skip
|
||||
text_lower = text.lower()
|
||||
for result in kb_results:
|
||||
payload = result.get("payload", {})
|
||||
kb_text = payload.get("text_preview", "").lower()
|
||||
|
||||
# Check for substantial overlap
|
||||
if kb_text and len(kb_text) > 100:
|
||||
# Simple word overlap check
|
||||
kb_words = set(kb_text.split())
|
||||
new_words = set(text_lower.split())
|
||||
if kb_words and new_words:
|
||||
overlap = len(kb_words & new_words) / len(kb_words)
|
||||
if overlap > similarity_threshold:
|
||||
return False
|
||||
return True
|
||||
|
||||
def store_in_kb(text, metadata):
|
||||
"""Store content in knowledge base"""
|
||||
import uuid
|
||||
import hashlib
|
||||
|
||||
embedding = get_embedding(text[:1000])
|
||||
if not embedding:
|
||||
return False
|
||||
|
||||
# Add metadata fields
|
||||
metadata["checksum"] = f"sha256:{hashlib.sha256(text.encode()).hexdigest()[:16]}"
|
||||
metadata["date_scraped"] = datetime.now().isoformat()
|
||||
metadata["text_preview"] = text[:300] + "..." if len(text) > 300 else text
|
||||
|
||||
point = {
|
||||
"id": str(uuid.uuid4()),
|
||||
"vector": embedding,
|
||||
"payload": metadata
|
||||
}
|
||||
|
||||
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points"
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=json.dumps({"points": [point]}).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="PUT"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result.get("status") == "ok"
|
||||
except Exception as e:
|
||||
print(f"⚠️ Store error: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def suggest_domain(query, title, content):
|
||||
"""Suggest a domain based on query and content"""
|
||||
query_lower = query.lower()
|
||||
title_lower = title.lower()
|
||||
content_lower = content[:500].lower()
|
||||
|
||||
# Keyword mapping
|
||||
domains = {
|
||||
"Python": ["python", "pip", "django", "flask", "asyncio"],
|
||||
"JavaScript": ["javascript", "js", "node", "react", "vue", "angular"],
|
||||
"Linux": ["linux", "ubuntu", "debian", "systemd", "bash", "shell"],
|
||||
"Networking": ["network", "dns", "tcp", "http", "ssl", "vpn"],
|
||||
"Docker": ["docker", "container", "kubernetes", "k8s"],
|
||||
"AI/ML": ["ai", "ml", "machine learning", "llm", "gpt", "model"],
|
||||
"OpenClaw": ["openclaw"],
|
||||
"Database": ["database", "sql", "postgres", "mysql", "redis"],
|
||||
"Security": ["security", "encryption", "auth", "oauth", "jwt"],
|
||||
"DevOps": ["devops", "ci/cd", "github actions", "jenkins"]
|
||||
}
|
||||
|
||||
combined = query_lower + " " + title_lower + " " + content_lower
|
||||
|
||||
for domain, keywords in domains.items():
|
||||
for kw in keywords:
|
||||
if kw in combined:
|
||||
return domain
|
||||
|
||||
return "General"
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Smart search: KB first, then web, store new")
|
||||
parser.add_argument("query", help="Search query")
|
||||
parser.add_argument("--domain", help="Filter KB by domain")
|
||||
parser.add_argument("--min-kb-score", type=float, default=0.5, help="Minimum KB match score (default: 0.5)")
|
||||
parser.add_argument("--store-new", action="store_true", help="Automatically store new web findings")
|
||||
parser.add_argument("--web-limit", type=int, default=3, help="Number of web results to check")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
results = {
|
||||
"query": args.query,
|
||||
"kb_results": [],
|
||||
"web_results": [],
|
||||
"stored_count": 0,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# Step 1: Search knowledge base
|
||||
print(f"🔍 Searching knowledge base (min score: {args.min_kb_score})...")
|
||||
kb_results = search_knowledge_base(args.query, args.domain, limit=5, min_score=args.min_kb_score)
|
||||
results["kb_results"] = kb_results
|
||||
|
||||
if kb_results:
|
||||
print(f" ✓ Found {len(kb_results)} KB entries")
|
||||
for r in kb_results:
|
||||
payload = r.get("payload", {})
|
||||
score = r.get("score", 0)
|
||||
title = payload.get('title', 'Untitled')[:50]
|
||||
source = payload.get('source_url', 'N/A')[:40]
|
||||
print(f" • {title}... (score: {score:.2f}) [{source}...]")
|
||||
else:
|
||||
print(f" ✗ No KB matches above threshold ({args.min_kb_score})")
|
||||
|
||||
# Step 2: Web search
|
||||
print(f"\n🌐 Searching web...")
|
||||
web_results = web_search(args.query, limit=args.web_limit)
|
||||
results["web_results"] = web_results
|
||||
|
||||
if not web_results:
|
||||
print(f" ✗ No web results")
|
||||
if args.json:
|
||||
print(json.dumps(results, indent=2))
|
||||
return
|
||||
|
||||
print(f" ✓ Found {len(web_results)} web results")
|
||||
|
||||
# Step 3: Check and optionally store new findings
|
||||
new_stored = 0
|
||||
|
||||
for web_result in web_results:
|
||||
url = web_result.get("url", "")
|
||||
title = web_result.get("title", "Untitled")
|
||||
snippet = web_result.get("content", "")
|
||||
|
||||
print(f"\n📄 Checking: {title}")
|
||||
print(f" URL: {url}")
|
||||
|
||||
# Fetch full content
|
||||
fetched_title, content = fetch_and_extract(url)
|
||||
if not content:
|
||||
print(f" ⚠️ Could not fetch content")
|
||||
continue
|
||||
|
||||
title = fetched_title or title
|
||||
|
||||
# Check if substantial
|
||||
if not is_substantial(content):
|
||||
print(f" ⏭️ Content too short ({len(content)} chars), skipping")
|
||||
continue
|
||||
|
||||
# Check if unique
|
||||
if not is_unique_content(content, kb_results):
|
||||
print(f" ⏭️ Similar content already in KB")
|
||||
continue
|
||||
|
||||
print(f" ✓ New substantial content ({len(content)} chars)")
|
||||
|
||||
# Auto-store or suggest
|
||||
if args.store_new:
|
||||
domain = suggest_domain(args.query, title, content)
|
||||
subjects = [s.strip() for s in args.query.lower().split() if len(s) > 3]
|
||||
|
||||
metadata = {
|
||||
"domain": domain,
|
||||
"path": f"{domain}/Web/{re.sub(r'[^\w\s-]', '', title)[:30]}",
|
||||
"subjects": subjects,
|
||||
"category": "reference",
|
||||
"content_type": "web_page",
|
||||
"title": title,
|
||||
"source_url": url,
|
||||
"date_added": datetime.now().strftime("%Y-%m-%d")
|
||||
}
|
||||
|
||||
if store_in_kb(content, metadata):
|
||||
print(f" ✅ Stored in KB (domain: {domain})")
|
||||
new_stored += 1
|
||||
else:
|
||||
print(f" ❌ Failed to store")
|
||||
else:
|
||||
print(f" 💡 Use --store-new to save this")
|
||||
|
||||
results["stored_count"] = new_stored
|
||||
|
||||
# Summary
|
||||
print(f"\n📊 Summary:")
|
||||
print(f" KB results: {len(kb_results)}")
|
||||
print(f" Web results checked: {len(web_results)}")
|
||||
print(f" New items stored: {new_stored}")
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(results, indent=2))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
159
skills/qdrant-memory/scripts/store_memory.py
Executable file
159
skills/qdrant-memory/scripts/store_memory.py
Executable file
@@ -0,0 +1,159 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Enhanced memory storage with metadata support
|
||||
Usage: store_memory.py "Memory text" [--tags tag1,tag2] [--importance medium]
|
||||
[--confidence high] [--source user|inferred|external]
|
||||
[--verified] [--expires 2026-03-01] [--related id1,id2]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import urllib.request
|
||||
import uuid
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
QDRANT_URL = "http://10.0.0.40:6333"
|
||||
COLLECTION_NAME = "kimi_memories"
|
||||
OLLAMA_URL = "http://10.0.0.10:11434/v1"
|
||||
|
||||
def get_embedding(text):
|
||||
"""Generate embedding using snowflake-arctic-embed2 via Ollama"""
|
||||
data = json.dumps({
|
||||
"model": "snowflake-arctic-embed2",
|
||||
"input": text[:8192]
|
||||
}).encode()
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{OLLAMA_URL}/embeddings",
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
return result["data"][0]["embedding"]
|
||||
except Exception as e:
|
||||
print(f"Error generating embedding: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def store_memory(text, embedding, tags=None, importance="medium", date=None,
|
||||
source="conversation", confidence="high", source_type="user",
|
||||
verified=True, expires_at=None, related_memories=None):
|
||||
"""Store memory in Qdrant with enhanced metadata"""
|
||||
|
||||
if date is None:
|
||||
date = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
# Generate a UUID for the point ID
|
||||
point_id = str(uuid.uuid4())
|
||||
|
||||
# Build payload with all metadata
|
||||
payload = {
|
||||
"text": text,
|
||||
"date": date,
|
||||
"tags": tags or [],
|
||||
"importance": importance,
|
||||
"source": source,
|
||||
"confidence": confidence, # high/medium/low
|
||||
"source_type": source_type, # user/inferred/external
|
||||
"verified": verified, # bool
|
||||
"created_at": datetime.now().isoformat(),
|
||||
"access_count": 0,
|
||||
"last_accessed": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# Optional metadata
|
||||
if expires_at:
|
||||
payload["expires_at"] = expires_at
|
||||
if related_memories:
|
||||
payload["related_memories"] = related_memories
|
||||
|
||||
# Qdrant upsert format
|
||||
upsert_data = {
|
||||
"points": [
|
||||
{
|
||||
"id": point_id,
|
||||
"vector": embedding,
|
||||
"payload": payload
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION_NAME}/points?wait=true",
|
||||
data=json.dumps(upsert_data).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="PUT"
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
if result.get("status") == "ok":
|
||||
return point_id
|
||||
else:
|
||||
print(f"Qdrant response: {result}", file=sys.stderr)
|
||||
return None
|
||||
except urllib.error.HTTPError as e:
|
||||
error_body = e.read().decode()
|
||||
print(f"HTTP Error {e.code}: {error_body}", file=sys.stderr)
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Error storing memory: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def link_memories(point_id, related_ids):
|
||||
"""Link this memory to related memories (bidirectional)"""
|
||||
# Update this memory to include related
|
||||
# Then update each related memory to include this one
|
||||
pass # Implementation would update existing points
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Store a memory in Qdrant with metadata")
|
||||
parser.add_argument("text", help="Memory text to store")
|
||||
parser.add_argument("--tags", help="Comma-separated tags")
|
||||
parser.add_argument("--importance", default="medium", choices=["low", "medium", "high"])
|
||||
parser.add_argument("--date", help="Date in YYYY-MM-DD format")
|
||||
parser.add_argument("--source", default="conversation", help="Source of the memory")
|
||||
parser.add_argument("--confidence", default="high", choices=["high", "medium", "low"],
|
||||
help="Confidence in this memory's accuracy")
|
||||
parser.add_argument("--source-type", default="user", choices=["user", "inferred", "external"],
|
||||
help="How this memory was obtained")
|
||||
parser.add_argument("--verified", action="store_true", default=True,
|
||||
help="Whether this memory has been verified")
|
||||
parser.add_argument("--expires", help="Expiration date YYYY-MM-DD (for temporary memories)")
|
||||
parser.add_argument("--related", help="Comma-separated related memory IDs")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse tags and related memories
|
||||
tags = [t.strip() for t in args.tags.split(",")] if args.tags else []
|
||||
related = [r.strip() for r in args.related.split(",")] if args.related else None
|
||||
|
||||
print(f"Generating embedding...")
|
||||
embedding = get_embedding(args.text)
|
||||
|
||||
if embedding is None:
|
||||
print("❌ Failed to generate embedding", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Storing memory (vector dim: {len(embedding)})...")
|
||||
point_id = store_memory(
|
||||
args.text, embedding, tags, args.importance, args.date, args.source,
|
||||
args.confidence, args.source_type, args.verified, args.expires, related
|
||||
)
|
||||
|
||||
if point_id:
|
||||
print(f"✅ Memory stored successfully")
|
||||
print(f" ID: {point_id}")
|
||||
print(f" Tags: {tags}")
|
||||
print(f" Importance: {args.importance}")
|
||||
print(f" Confidence: {args.confidence}")
|
||||
print(f" Source: {args.source_type}")
|
||||
if args.expires:
|
||||
print(f" Expires: {args.expires}")
|
||||
else:
|
||||
print(f"❌ Failed to store memory", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
Reference in New Issue
Block a user