forked from SpeedyFoxAi/jarvis-memory
250 lines
9.8 KiB
Python
250 lines
9.8 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Monitor OpenClaw GitHub repo for relevant updates
|
||
|
|
Only outputs/announces when there are significant changes affecting our setup.
|
||
|
|
Always exits with code 0 to prevent "exec failed" logs.
|
||
|
|
Usage: monitor_openclaw_repo.py [--json]
|
||
|
|
"""
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import sys
|
||
|
|
import json
|
||
|
|
import urllib.request
|
||
|
|
import re
|
||
|
|
import hashlib
|
||
|
|
from datetime import datetime
|
||
|
|
|
||
|
|
QDRANT_URL = "http://10.0.0.40:6333"
|
||
|
|
KB_COLLECTION = "knowledge_base"
|
||
|
|
|
||
|
|
# Keywords that indicate relevance to our setup
|
||
|
|
RELEVANT_KEYWORDS = [
|
||
|
|
"ollama", "model", "embedding", "llm", "ai",
|
||
|
|
"telegram", "webchat", "signal", "discord",
|
||
|
|
"skill", "skills", "qdrant", "memory", "search",
|
||
|
|
"whisper", "tts", "voice", "cron",
|
||
|
|
"gateway", "agent", "session", "vector",
|
||
|
|
"browser", "exec", "read", "edit", "write",
|
||
|
|
"breaking", "deprecated", "removed", "changed",
|
||
|
|
"fix", "bug", "patch", "security", "vulnerability"
|
||
|
|
]
|
||
|
|
|
||
|
|
HIGH_PRIORITY_AREAS = [
|
||
|
|
"ollama", "telegram", "qdrant", "memory", "skills",
|
||
|
|
"voice", "cron", "gateway", "browser"
|
||
|
|
]
|
||
|
|
|
||
|
|
def fetch_github_api(url):
|
||
|
|
headers = {
|
||
|
|
'User-Agent': 'OpenClaw-KB-Monitor',
|
||
|
|
'Accept': 'application/vnd.github.v3+json'
|
||
|
|
}
|
||
|
|
req = urllib.request.Request(url, headers=headers)
|
||
|
|
try:
|
||
|
|
with urllib.request.urlopen(req, timeout=20) as response:
|
||
|
|
return json.loads(response.read().decode())
|
||
|
|
except Exception as e:
|
||
|
|
return None
|
||
|
|
|
||
|
|
def fetch_github_html(url):
|
||
|
|
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
|
||
|
|
req = urllib.request.Request(url, headers=headers)
|
||
|
|
try:
|
||
|
|
with urllib.request.urlopen(req, timeout=20) as response:
|
||
|
|
html = response.read().decode('utf-8', errors='ignore')
|
||
|
|
text = re.sub(r'<script[^>]*>.*?</script>', ' ', html, flags=re.DOTALL | re.IGNORECASE)
|
||
|
|
text = re.sub(r'<style[^>]*>.*?</style>', ' ', text, flags=re.DOTALL | re.IGNORECASE)
|
||
|
|
text = re.sub(r'<[^>]+>', ' ', text)
|
||
|
|
text = re.sub(r'\s+', ' ', text).strip()
|
||
|
|
return text[:5000]
|
||
|
|
except:
|
||
|
|
return None
|
||
|
|
|
||
|
|
def get_embedding(text):
|
||
|
|
import json as jsonlib
|
||
|
|
data = {"model": "nomic-embed-text", "input": text[:1000]}
|
||
|
|
req = urllib.request.Request(
|
||
|
|
"http://localhost:11434/api/embed",
|
||
|
|
data=jsonlib.dumps(data).encode(),
|
||
|
|
headers={"Content-Type": "application/json"},
|
||
|
|
method="POST"
|
||
|
|
)
|
||
|
|
try:
|
||
|
|
with urllib.request.urlopen(req, timeout=30) as response:
|
||
|
|
result = jsonlib.loads(response.read().decode())
|
||
|
|
return result.get("embeddings", [None])[0]
|
||
|
|
except:
|
||
|
|
return None
|
||
|
|
|
||
|
|
def search_kb_by_path(path_prefix):
|
||
|
|
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points/scroll"
|
||
|
|
data = {"limit": 100, "with_payload": True}
|
||
|
|
req = urllib.request.Request(url, data=json.dumps(data).encode(),
|
||
|
|
headers={"Content-Type": "application/json"}, method="POST")
|
||
|
|
try:
|
||
|
|
with urllib.request.urlopen(req, timeout=10) as response:
|
||
|
|
result = json.loads(response.read().decode())
|
||
|
|
points = result.get("result", {}).get("points", [])
|
||
|
|
return [p for p in points if p.get("payload", {}).get("path", "").startswith(path_prefix)]
|
||
|
|
except:
|
||
|
|
return []
|
||
|
|
|
||
|
|
def store_in_kb(text, metadata):
|
||
|
|
import uuid
|
||
|
|
embedding = get_embedding(text)
|
||
|
|
if not embedding:
|
||
|
|
return None
|
||
|
|
metadata["checksum"] = f"sha256:{hashlib.sha256(text.encode()).hexdigest()[:16]}"
|
||
|
|
metadata["date_scraped"] = datetime.now().isoformat()
|
||
|
|
metadata["text_preview"] = text[:300] + "..." if len(text) > 300 else text
|
||
|
|
point = {"id": str(uuid.uuid4()), "vector": embedding, "payload": metadata}
|
||
|
|
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points"
|
||
|
|
req = urllib.request.Request(url, data=json.dumps({"points": [point]}).encode(),
|
||
|
|
headers={"Content-Type": "application/json"}, method="PUT")
|
||
|
|
try:
|
||
|
|
with urllib.request.urlopen(req, timeout=10) as response:
|
||
|
|
result = json.loads(response.read().decode())
|
||
|
|
return result.get("status") == "ok"
|
||
|
|
except:
|
||
|
|
return False
|
||
|
|
|
||
|
|
def delete_kb_entry(entry_id):
|
||
|
|
url = f"{QDRANT_URL}/collections/{KB_COLLECTION}/points/delete"
|
||
|
|
data = {"points": [entry_id]}
|
||
|
|
req = urllib.request.Request(url, data=json.dumps(data).encode(),
|
||
|
|
headers={"Content-Type": "application/json"}, method="POST")
|
||
|
|
try:
|
||
|
|
with urllib.request.urlopen(req, timeout=10) as response:
|
||
|
|
result = json.loads(response.read().decode())
|
||
|
|
return result.get("status") == "ok"
|
||
|
|
except:
|
||
|
|
return False
|
||
|
|
|
||
|
|
def is_relevant_change(text):
|
||
|
|
text_lower = text.lower()
|
||
|
|
found_keywords = [kw for kw in RELEVANT_KEYWORDS if kw in text_lower]
|
||
|
|
high_priority_found = [area for area in HIGH_PRIORITY_AREAS if area in text_lower]
|
||
|
|
return {
|
||
|
|
"relevant": len(found_keywords) > 0,
|
||
|
|
"keywords": found_keywords,
|
||
|
|
"high_priority": high_priority_found,
|
||
|
|
"score": len(found_keywords) + (len(high_priority_found) * 2)
|
||
|
|
}
|
||
|
|
|
||
|
|
def evaluate_significance(changes):
|
||
|
|
total_score = sum(c["analysis"]["score"] for c in changes)
|
||
|
|
high_priority_count = sum(len(c["analysis"]["high_priority"]) for c in changes)
|
||
|
|
return {
|
||
|
|
"significant": total_score >= 3 or high_priority_count > 0,
|
||
|
|
"total_score": total_score,
|
||
|
|
"high_priority_count": high_priority_count
|
||
|
|
}
|
||
|
|
|
||
|
|
def format_summary(changes, significance):
|
||
|
|
lines = ["📊 OpenClaw Repo Update", f"📅 {datetime.now().strftime('%Y-%m-%d')}", ""]
|
||
|
|
by_section = {}
|
||
|
|
for change in changes:
|
||
|
|
section = change["section"]
|
||
|
|
if section not in by_section:
|
||
|
|
by_section[section] = []
|
||
|
|
by_section[section].append(change)
|
||
|
|
|
||
|
|
for section, items in by_section.items():
|
||
|
|
lines.append(f"📁 {section}")
|
||
|
|
for item in items[:3]:
|
||
|
|
title = item["title"][:50] + "..." if len(item["title"]) > 50 else item["title"]
|
||
|
|
lines.append(f" • {title}")
|
||
|
|
if item["analysis"]["high_priority"]:
|
||
|
|
lines.append(f" ⚠️ Affects: {', '.join(item['analysis']['high_priority'][:2])}")
|
||
|
|
if len(items) > 3:
|
||
|
|
lines.append(f" ... and {len(items) - 3} more")
|
||
|
|
lines.append("")
|
||
|
|
return "\n".join(lines)
|
||
|
|
|
||
|
|
def scrape_all_sections():
|
||
|
|
sections = []
|
||
|
|
main_text = fetch_github_html("https://github.com/openclaw/openclaw")
|
||
|
|
if main_text:
|
||
|
|
sections.append({"section": "Main Repo", "title": "openclaw/openclaw README",
|
||
|
|
"url": "https://github.com/openclaw/openclaw", "content": main_text})
|
||
|
|
|
||
|
|
releases = fetch_github_api("https://api.github.com/repos/openclaw/openclaw/releases?per_page=5")
|
||
|
|
if releases:
|
||
|
|
for release in releases:
|
||
|
|
sections.append({"section": "Release", "title": release.get("name", release.get("tag_name", "Unknown")),
|
||
|
|
"url": release.get("html_url", ""), "content": release.get("body", "")[:2000],
|
||
|
|
"published": release.get("published_at", "")})
|
||
|
|
|
||
|
|
issues = fetch_github_api("https://api.github.com/repos/openclaw/openclaw/issues?state=open&per_page=5")
|
||
|
|
if issues:
|
||
|
|
for issue in issues:
|
||
|
|
if "pull_request" not in issue:
|
||
|
|
sections.append({"section": "Issue", "title": issue.get("title", "Unknown"),
|
||
|
|
"url": issue.get("html_url", ""), "content": issue.get("body", "")[:1500] if issue.get("body") else "No description",
|
||
|
|
"labels": [l.get("name", "") for l in issue.get("labels", [])]})
|
||
|
|
return sections
|
||
|
|
|
||
|
|
def check_and_update():
|
||
|
|
sections = scrape_all_sections()
|
||
|
|
if not sections:
|
||
|
|
return None, "No data scraped"
|
||
|
|
|
||
|
|
existing_entries = search_kb_by_path("OpenClaw/GitHub")
|
||
|
|
existing_checksums = {e.get("payload", {}).get("checksum", ""): e for e in existing_entries}
|
||
|
|
changes_detected = []
|
||
|
|
|
||
|
|
for section in sections:
|
||
|
|
content = section["content"]
|
||
|
|
if not content:
|
||
|
|
continue
|
||
|
|
checksum = f"sha256:{hashlib.sha256(content.encode()).hexdigest()[:16]}"
|
||
|
|
if checksum in existing_checksums:
|
||
|
|
continue
|
||
|
|
|
||
|
|
analysis = is_relevant_change(content + " " + section["title"])
|
||
|
|
section["analysis"] = analysis
|
||
|
|
section["checksum"] = checksum
|
||
|
|
changes_detected.append(section)
|
||
|
|
|
||
|
|
for old_checksum, old_entry in existing_checksums.items():
|
||
|
|
if old_entry.get("payload", {}).get("title", "") == section["title"]:
|
||
|
|
delete_kb_entry(old_entry.get("id"))
|
||
|
|
break
|
||
|
|
|
||
|
|
metadata = {
|
||
|
|
"domain": "OpenClaw", "path": f"OpenClaw/GitHub/{section['section']}/{section['title'][:30]}",
|
||
|
|
"subjects": ["openclaw", "github", section['section'].lower()], "category": "reference",
|
||
|
|
"content_type": "web_page", "title": section["title"], "source_url": section["url"],
|
||
|
|
"date_added": datetime.now().strftime("%Y-%m-%d")
|
||
|
|
}
|
||
|
|
store_in_kb(content, metadata)
|
||
|
|
|
||
|
|
if changes_detected:
|
||
|
|
significance = evaluate_significance(changes_detected)
|
||
|
|
if significance["significant"]:
|
||
|
|
return {"changes": changes_detected, "significance": significance,
|
||
|
|
"summary": format_summary(changes_detected, significance)}, None
|
||
|
|
else:
|
||
|
|
return None, "Changes not significant"
|
||
|
|
return None, "No changes detected"
|
||
|
|
|
||
|
|
def main():
|
||
|
|
parser = argparse.ArgumentParser()
|
||
|
|
parser.add_argument("--json", action="store_true")
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
result, reason = check_and_update()
|
||
|
|
|
||
|
|
# Always output JSON for cron compatibility, even if empty
|
||
|
|
if args.json:
|
||
|
|
print(json.dumps(result if result else {}))
|
||
|
|
elif result:
|
||
|
|
print(result["summary"])
|
||
|
|
# If no result, output nothing (silent)
|
||
|
|
|
||
|
|
# Always exit 0 to prevent "exec failed" logs
|
||
|
|
sys.exit(0)
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|