Initial commit: Vera-AI v2 with async Qdrant, singleton pattern, monthly curation, and configurable UID/GID/TZ

Features: - AsyncQdrantClient for non-blocking Qdrant operations - Singleton pattern for QdrantService - Monthly full curation (day 1 at 03:00) - Configurable UID/GID for Docker - Timezone support via TZ env var - Configurable log directory (VERA_LOG_DIR) - Volume mounts for config/, prompts/, logs/ - Standard Docker format with .env file Fixes: - Removed unused system_token_budget - Added semantic_score_threshold config - Fixed streaming response handling - Python-based healthcheck (no curl dependency)
2026-03-26 12:37:25 -05:00
commit 50593e200d
21 changed files with 1916 additions and 0 deletions
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,156 @@
+# app/main.py
+from fastapi import FastAPI, Request
+from fastapi.responses import StreamingResponse, JSONResponse
+from contextlib import asynccontextmanager
+import httpx
+import logging
+from datetime import datetime
+
+from .config import config
+from .singleton import get_qdrant_service
+from .proxy_handler import handle_chat, forward_to_ollama, handle_chat_non_streaming
+from .curator import Curator
+from apscheduler.schedulers.asyncio import AsyncIOScheduler
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+scheduler = AsyncIOScheduler()
+curator = None
+
+
+async def run_curator():
+    """Scheduled daily curator job (recent 24h)."""
+    global curator
+    logger.info("Starting daily memory curation...")
+    try:
+        await curator.run_daily()
+        logger.info("Daily memory curation completed successfully")
+    except Exception as e:
+        logger.error(f"Daily memory curation failed: {e}")
+
+
+async def run_curator_full():
+    """Scheduled monthly curator job (full database)."""
+    global curator
+    logger.info("Starting monthly full memory curation...")
+    try:
+        await curator.run_full()
+        logger.info("Monthly full memory curation completed successfully")
+    except Exception as e:
+        logger.error(f"Monthly full memory curation failed: {e}")
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Application lifespan - startup and shutdown."""
+    global curator
+    
+    logger.info("Starting Vera-AI...")
+    
+    # Initialize singleton QdrantService
+    qdrant_service = get_qdrant_service()
+    await qdrant_service._ensure_collection()
+    
+    # Initialize curator with singleton
+    curator = Curator(
+        qdrant_service=qdrant_service,
+        model=config.curator_model,
+        ollama_host=config.ollama_host
+    )
+    
+    # Schedule daily curator (recent 24h)
+    hour, minute = map(int, config.run_time.split(":"))
+    scheduler.add_job(run_curator, "cron", hour=hour, minute=minute, id="daily_curator")
+    logger.info(f"Daily curator scheduled at {config.run_time}")
+    
+    # Schedule monthly full curator (all raw memories)
+    full_hour, full_minute = map(int, config.full_run_time.split(":"))
+    scheduler.add_job(
+        run_curator_full, 
+        "cron", 
+        day=config.full_run_day, 
+        hour=full_hour, 
+        minute=full_minute,
+        id="monthly_curator"
+    )
+    logger.info(f"Monthly full curator scheduled on day {config.full_run_day} at {config.full_run_time}")
+    
+    scheduler.start()
+    
+    yield
+    
+    logger.info("Shutting down Vera-AI...")
+    scheduler.shutdown()
+    await qdrant_service.close()
+
+
+app = FastAPI(title="Vera-AI", version="2.0.0", lifespan=lifespan)
+
+
+@app.get("/")
+async def health_check():
+    """Health check endpoint."""
+    ollama_status = "unreachable"
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            resp = await client.get(f"{config.ollama_host}/api/tags")
+            if resp.status_code == 200:
+                ollama_status = "reachable"
+    except: pass
+    return {"status": "ok", "ollama": ollama_status}
+
+
+@app.get("/api/tags")
+async def api_tags():
+    """Proxy to Ollama /api/tags with cloud model injection."""
+    async with httpx.AsyncClient() as client:
+        resp = await client.get(f"{config.ollama_host}/api/tags")
+        data = resp.json()
+    
+    if config.cloud.enabled and config.cloud.models:
+        for name in config.cloud.models.keys():
+            data["models"].append({
+                "name": name,
+                "modified_at": "2026-03-25T00:00:00Z",
+                "size": 0,
+                "digest": "cloud",
+                "details": {"family": "cloud"}
+            })
+    return JSONResponse(content=data)
+
+
+@app.api_route("/api/{path:path}", methods=["GET", "POST", "DELETE"])
+async def proxy_all(request: Request, path: str):
+    if path == "chat":
+        body = await request.json()
+        is_stream = body.get("stream", True)
+        
+        if is_stream:
+            return await handle_chat(request)
+        else:
+            return await handle_chat_non_streaming(body)
+    else:
+        resp = await forward_to_ollama(request, f"/api/{path}")
+        return StreamingResponse(
+            resp.aiter_bytes(),
+            status_code=resp.status_code,
+            headers=dict(resp.headers),
+            media_type=resp.headers.get("content-type")
+        )
+
+
+@app.post("/curator/run")
+async def trigger_curator(full: bool = False):
+    """Manually trigger curator.
+    
+    Args:
+        full: If True, run full curation (all raw memories).
+              If False (default), run daily curation (recent 24h).
+    """
+    if full:
+        await run_curator_full()
+        return {"status": "full curation completed"}
+    else:
+        await run_curator()
+        return {"status": "daily curation completed"}