Files
jarvis-memory/skills/qdrant-memory/scripts/monitor_openclaw_repo.py

250 lines
9.8 KiB
Python
Raw Normal View History

2026-02-23 12:13:04 -06:00
#!/usr/bin/env python3
"""
Monitor OpenClaw GitHub repo for relevant updates
Only outputs/announces when there are significant changes affecting our setup.
Always exits with code 0 to prevent "exec failed" logs.
Usage: monitor_openclaw_repo.py [--json]
"""
import argparse
import sys
import json
import urllib.request
import re
import hashlib
from datetime import datetime
QDRANT_URL = "http://10.0.0.40:6333"
KB_COLLECTION = "knowledge_base"
# Keywords that indicate relevance to our setup
RELEVANT_KEYWORDS = [
"ollama", "model", "embedding", "llm", "ai",
"telegram", "webchat", "signal", "discord",
"skill", "skills", "qdrant", "memory", "search",
"whisper", "tts", "voice", "cron",
"gateway", "agent", "session", "vector",
"browser", "exec", "read", "edit", "write",
"breaking", "deprecated", "removed", "changed",
"fix", "bug", "patch", "security", "vulnerability"
]
HIGH_PRIORITY_AREAS = [
"ollama", "telegram", "qdrant", "memory", "skills",
"voice", "cron", "gateway", "browser"
]
def fetch_github_api(url):
headers = {
'User-Agent': 'OpenClaw-KB-Monitor',
'Accept': 'application/vnd.github.v3+json'
}
req = urllib.request.Request(url, headers=headers)
try:
with urllib.request.urlopen(req, timeout=20) as response:
return json.loads(response.read().decode())
except Exception as e:
return None
def fetch_github_html(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
req = urllib.request.Request(url, headers=headers)
try:
with urllib.request.urlopen(req, timeout=20) as response:
html = response.read().decode('utf-8', errors='ignore')
text = re.sub(r'<script[^>]*>.*?</script>', ' ', html, flags=re.DOTALL | re.IGNORECASE)
text = re.sub(r'<style[^>]*>.*?</style>', ' ', text, flags=re.DOTALL | re.IGNORECASE)
text = re.sub(r'<[^>]+>', ' ', text)
text = re.sub(r'\s+', ' ', text).strip()
return text[:5000]
except:
return None
def get_embedding(text):
import json as jsonlib
data = {"model": "nomic-embed-text", "input": text[:1000]}
req = urllib.request.Request(
"http://localhost:11434/api/embed",
data=jsonlib.dumps(data).encode(),
headers={"Content-Type": "application/json"},
method="POST"
)
try:
with urllib.request.urlopen(req, timeout=30) as response:
result = jsonlib.loads(response.read().decode())
return result.get("embeddings", [None])[0]
except:
return None
def search_kb_by_path(path_prefix):
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points/scroll"
data = {"limit": 100, "with_payload": True}
req = urllib.request.Request(url, data=json.dumps(data).encode(),
headers={"Content-Type": "application/json"}, method="POST")
try:
with urllib.request.urlopen(req, timeout=10) as response:
result = json.loads(response.read().decode())
points = result.get("result", {}).get("points", [])
return [p for p in points if p.get("payload", {}).get("path", "").startswith(path_prefix)]
except:
return []
def store_in_kb(text, metadata):
import uuid
embedding = get_embedding(text)
if not embedding:
return None
metadata["checksum"] = f"sha256:{hashlib.sha256(text.encode()).hexdigest()[:16]}"
metadata["date_scraped"] = datetime.now().isoformat()
metadata["text_preview"] = text[:300] + "..." if len(text) > 300 else text
point = {"id": str(uuid.uuid4()), "vector": embedding, "payload": metadata}
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points"
req = urllib.request.Request(url, data=json.dumps({"points": [point]}).encode(),
headers={"Content-Type": "application/json"}, method="PUT")
try:
with urllib.request.urlopen(req, timeout=10) as response:
result = json.loads(response.read().decode())
return result.get("status") == "ok"
except:
return False
def delete_kb_entry(entry_id):
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points/delete"
data = {"points": [entry_id]}
req = urllib.request.Request(url, data=json.dumps(data).encode(),
headers={"Content-Type": "application/json"}, method="POST")
try:
with urllib.request.urlopen(req, timeout=10) as response:
result = json.loads(response.read().decode())
return result.get("status") == "ok"
except:
return False
def is_relevant_change(text):
text_lower = text.lower()
found_keywords = [kw for kw in RELEVANT_KEYWORDS if kw in text_lower]
high_priority_found = [area for area in HIGH_PRIORITY_AREAS if area in text_lower]
return {
"relevant": len(found_keywords) > 0,
"keywords": found_keywords,
"high_priority": high_priority_found,
"score": len(found_keywords) + (len(high_priority_found) * 2)
}
def evaluate_significance(changes):
total_score = sum(c["analysis"]["score"] for c in changes)
high_priority_count = sum(len(c["analysis"]["high_priority"]) for c in changes)
return {
"significant": total_score >= 3 or high_priority_count > 0,
"total_score": total_score,
"high_priority_count": high_priority_count
}
def format_summary(changes, significance):
lines = ["📊 OpenClaw Repo Update", f"📅 {datetime.now().strftime('%Y-%m-%d')}", ""]
by_section = {}
for change in changes:
section = change["section"]
if section not in by_section:
by_section[section] = []
by_section[section].append(change)
for section, items in by_section.items():
lines.append(f"📁 {section}")
for item in items[:3]:
title = item["title"][:50] + "..." if len(item["title"]) > 50 else item["title"]
lines.append(f"{title}")
if item["analysis"]["high_priority"]:
lines.append(f" ⚠️ Affects: {', '.join(item['analysis']['high_priority'][:2])}")
if len(items) > 3:
lines.append(f" ... and {len(items) - 3} more")
lines.append("")
return "\n".join(lines)
def scrape_all_sections():
sections = []
main_text = fetch_github_html("https://github.com/openclaw/openclaw")
if main_text:
sections.append({"section": "Main Repo", "title": "openclaw/openclaw README",
"url": "https://github.com/openclaw/openclaw", "content": main_text})
releases = fetch_github_api("https://api.github.com/repos/openclaw/openclaw/releases?per_page=5")
if releases:
for release in releases:
sections.append({"section": "Release", "title": release.get("name", release.get("tag_name", "Unknown")),
"url": release.get("html_url", ""), "content": release.get("body", "")[:2000],
"published": release.get("published_at", "")})
issues = fetch_github_api("https://api.github.com/repos/openclaw/openclaw/issues?state=open&per_page=5")
if issues:
for issue in issues:
if "pull_request" not in issue:
sections.append({"section": "Issue", "title": issue.get("title", "Unknown"),
"url": issue.get("html_url", ""), "content": issue.get("body", "")[:1500] if issue.get("body") else "No description",
"labels": [l.get("name", "") for l in issue.get("labels", [])]})
return sections
def check_and_update():
sections = scrape_all_sections()
if not sections:
return None, "No data scraped"
existing_entries = search_kb_by_path("OpenClaw/GitHub")
existing_checksums = {e.get("payload", {}).get("checksum", ""): e for e in existing_entries}
changes_detected = []
for section in sections:
content = section["content"]
if not content:
continue
checksum = f"sha256:{hashlib.sha256(content.encode()).hexdigest()[:16]}"
if checksum in existing_checksums:
continue
analysis = is_relevant_change(content + " " + section["title"])
section["analysis"] = analysis
section["checksum"] = checksum
changes_detected.append(section)
for old_checksum, old_entry in existing_checksums.items():
if old_entry.get("payload", {}).get("title", "") == section["title"]:
delete_kb_entry(old_entry.get("id"))
break
metadata = {
"domain": "OpenClaw", "path": f"OpenClaw/GitHub/{section['section']}/{section['title'][:30]}",
"subjects": ["openclaw", "github", section['section'].lower()], "category": "reference",
"content_type": "web_page", "title": section["title"], "source_url": section["url"],
"date_added": datetime.now().strftime("%Y-%m-%d")
}
store_in_kb(content, metadata)
if changes_detected:
significance = evaluate_significance(changes_detected)
if significance["significant"]:
return {"changes": changes_detected, "significance": significance,
"summary": format_summary(changes_detected, significance)}, None
else:
return None, "Changes not significant"
return None, "No changes detected"
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--json", action="store_true")
args = parser.parse_args()
result, reason = check_and_update()
# Always output JSON for cron compatibility, even if empty
if args.json:
print(json.dumps(result if result else {}))
elif result:
print(result["summary"])
# If no result, output nothing (silent)
# Always exit 0 to prevent "exec failed" logs
sys.exit(0)
if __name__ == "__main__":
main()