Files
vera-ai-v2/app/main.py
Vera-AI abfcc91eb3 v2.0.3: Improve error handling, add tests, cleanup
- Fix bare except clauses in curator.py and main.py
- Change embedding model to snowflake-arctic-embed2
- Increase semantic_score_threshold to 0.6
- Add memory context explanation to systemprompt.md
- Add pytest dependencies to requirements.txt
- Remove unused context_handler.py and .env.example
- Add project documentation (CLAUDE.md) and test files

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-30 08:47:56 -05:00

136 lines
4.1 KiB
Python

# app/main.py
from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse, JSONResponse
from contextlib import asynccontextmanager
import httpx
import logging
from datetime import datetime
from .config import config
from .singleton import get_qdrant_service
from .proxy_handler import handle_chat, forward_to_ollama, handle_chat_non_streaming
from .curator import Curator
from apscheduler.schedulers.asyncio import AsyncIOScheduler
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
scheduler = AsyncIOScheduler()
curator = None
async def run_curator():
"""Scheduled daily curator job.
Runs every day at configured time. The curator itself detects
if it's day 01 (monthly mode) and processes all memories.
Otherwise processes recent 24h only.
"""
global curator
logger.info("Starting memory curation...")
try:
await curator.run()
logger.info("Memory curation completed successfully")
except Exception as e:
logger.error(f"Memory curation failed: {e}")
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan - startup and shutdown."""
global curator
logger.info("Starting Vera-AI...")
# Initialize singleton QdrantService
qdrant_service = get_qdrant_service()
await qdrant_service._ensure_collection()
# Initialize curator with singleton
curator = Curator(
qdrant_service=qdrant_service,
model=config.curator_model,
ollama_host=config.ollama_host
)
# Schedule daily curator
# Note: Monthly mode is detected automatically by curator_prompt.md (day 01)
hour, minute = map(int, config.run_time.split(":"))
scheduler.add_job(run_curator, "cron", hour=hour, minute=minute, id="daily_curator")
logger.info(f"Daily curator scheduled at {config.run_time}")
scheduler.start()
yield
logger.info("Shutting down Vera-AI...")
scheduler.shutdown()
await qdrant_service.close()
app = FastAPI(title="Vera-AI", version="2.0.0", lifespan=lifespan)
@app.get("/")
async def health_check():
"""Health check endpoint."""
ollama_status = "unreachable"
try:
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.get(f"{config.ollama_host}/api/tags")
if resp.status_code == 200:
ollama_status = "reachable"
except Exception:
logger.warning(f"Failed to reach Ollama at {config.ollama_host}")
return {"status": "ok", "ollama": ollama_status}
@app.get("/api/tags")
async def api_tags():
"""Proxy to Ollama /api/tags with cloud model injection."""
async with httpx.AsyncClient() as client:
resp = await client.get(f"{config.ollama_host}/api/tags")
data = resp.json()
if config.cloud.enabled and config.cloud.models:
for name in config.cloud.models.keys():
data["models"].append({
"name": name,
"modified_at": "2026-03-25T00:00:00Z",
"size": 0,
"digest": "cloud",
"details": {"family": "cloud"}
})
return JSONResponse(content=data)
@app.api_route("/api/{path:path}", methods=["GET", "POST", "DELETE"])
async def proxy_all(request: Request, path: str):
if path == "chat":
body = await request.json()
is_stream = body.get("stream", True)
if is_stream:
return await handle_chat(request)
else:
return await handle_chat_non_streaming(body)
else:
resp = await forward_to_ollama(request, f"/api/{path}")
return StreamingResponse(
resp.aiter_bytes(),
status_code=resp.status_code,
headers=dict(resp.headers),
media_type=resp.headers.get("content-type")
)
@app.post("/curator/run")
async def trigger_curator():
"""Manually trigger curator.
The curator will automatically detect if it's day 01 (monthly mode)
and process all memories. Otherwise processes recent 24h.
"""
await run_curator()
return {"status": "curation completed"}