107 lines
3.0 KiB
Python
107 lines
3.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Debug curator with real data"""
|
|
import json
|
|
import requests
|
|
import urllib.request
|
|
|
|
QDRANT_URL = "http://10.0.0.40:6333"
|
|
SOURCE_COLLECTION = "kimi_memories"
|
|
|
|
# Get sample turns from real data
|
|
filter_data = {
|
|
"must": [
|
|
{"key": "user_id", "match": {"value": "rob"}},
|
|
{"key": "date", "match": {"value": "2026-02-23"}}
|
|
]
|
|
}
|
|
|
|
req = urllib.request.Request(
|
|
f"{QDRANT_URL}/collections/{SOURCE_COLLECTION}/points/scroll",
|
|
data=json.dumps({"limit": 5, "with_payload": True, "filter": filter_data}).encode(),
|
|
headers={"Content-Type": "application/json"},
|
|
method="POST"
|
|
)
|
|
|
|
with urllib.request.urlopen(req, timeout=30) as response:
|
|
result = json.loads(response.read().decode())
|
|
points = result.get("result", {}).get("points", [])
|
|
|
|
turns = []
|
|
for point in points:
|
|
payload = point.get("payload", {})
|
|
user_msg = payload.get("user_message", "")
|
|
ai_msg = payload.get("ai_response", "")
|
|
|
|
if user_msg or ai_msg:
|
|
turn = {
|
|
"turn": payload.get("turn_number", 0),
|
|
"user_id": payload.get("user_id", "rob"),
|
|
"user": user_msg[:300], # Truncate
|
|
"ai": ai_msg[:300], # Truncate
|
|
"conversation_id": payload.get("conversation_id", ""),
|
|
"timestamp": payload.get("created_at", ""),
|
|
"date": payload.get("date", "2026-02-23")
|
|
}
|
|
turns.append(turn)
|
|
|
|
turns.sort(key=lambda x: (x.get("conversation_id", ""), x.get("turn", 0)))
|
|
|
|
print(f"Got {len(turns)} turns")
|
|
print("Sample:")
|
|
for t in turns[:2]:
|
|
print(f" User: {t['user'][:100]}...")
|
|
print(f" AI: {t['ai'][:100]}...")
|
|
|
|
# Now test with curator
|
|
with open('/root/.openclaw/workspace/.projects/true-recall-v1/curator-prompt.md') as f:
|
|
prompt = f.read()
|
|
|
|
conversation_json = json.dumps(turns[:5], indent=2)
|
|
|
|
prompt_text = f"""## Input Conversation
|
|
|
|
```json
|
|
{conversation_json}
|
|
```
|
|
|
|
## Output
|
|
"""
|
|
|
|
response = requests.post(
|
|
'http://10.0.0.10:11434/api/generate',
|
|
json={
|
|
'model': 'qwen3:4b-instruct',
|
|
'system': prompt,
|
|
'prompt': prompt_text,
|
|
'stream': False,
|
|
'options': {'temperature': 0.1, 'num_predict': 3000}
|
|
},
|
|
timeout=120
|
|
)
|
|
|
|
result = response.json()
|
|
output = result.get('response', '').strip()
|
|
|
|
print("\n=== CURATOR OUTPUT ===")
|
|
print(output[:3000])
|
|
print("\n=== TRYING TO PARSE ===")
|
|
|
|
# Try to parse
|
|
try:
|
|
if '```json' in output:
|
|
parsed = output.split('```json')[1].split('```')[0].strip()
|
|
gems = json.loads(parsed)
|
|
print(f"Parsed {len(gems)} gems")
|
|
elif '```' in output:
|
|
parsed = output.split('```')[1].split('```')[0].strip()
|
|
gems = json.loads(parsed)
|
|
print(f"Parsed {len(gems)} gems")
|
|
else:
|
|
gems = json.loads(output)
|
|
print(f"Parsed {len(gems)} gems")
|
|
except Exception as e:
|
|
print(f"Parse error: {e}")
|
|
print("Trying raw parse...")
|
|
gems = json.loads(output.strip())
|
|
print(f"Parsed {len(gems)} gems")
|