Initial commit: Vera-AI v2 with async Qdrant, singleton pattern, monthly curation, and configurable UID/GID/TZ

Features: - AsyncQdrantClient for non-blocking Qdrant operations - Singleton pattern for QdrantService - Monthly full curation (day 1 at 03:00) - Configurable UID/GID for Docker - Timezone support via TZ env var - Configurable log directory (VERA_LOG_DIR) - Volume mounts for config/, prompts/, logs/ - Standard Docker format with .env file Fixes: - Removed unused system_token_budget - Added semantic_score_threshold config - Fixed streaming response handling - Python-based healthcheck (no curl dependency)
2026-03-26 12:37:25 -05:00
commit 50593e200d
21 changed files with 1916 additions and 0 deletions
--- a/app/config.py
+++ b/app/config.py
@@ -0,0 +1,121 @@
+# app/config.py
+import toml
+import os
+from pathlib import Path
+from dataclasses import dataclass, field
+from typing import Dict, Optional
+
+# Embedding model dimensions
+EMBEDDING_DIMS = {
+    "nomic-embed-text": 768,
+    "snowflake-arctic-embed2": 1024,
+    "mxbai-embed-large": 1024,
+}
+
+# Configurable paths (can be overridden via environment)
+CONFIG_DIR = Path(os.environ.get("VERA_CONFIG_DIR", "/app/config"))
+PROMPTS_DIR = Path(os.environ.get("VERA_PROMPTS_DIR", "/app/prompts"))
+STATIC_DIR = Path(os.environ.get("VERA_STATIC_DIR", "/app/static"))
+
+@dataclass
+class CloudConfig:
+    enabled: bool = False
+    api_base: str = ""
+    api_key_env: str = "OPENROUTER_API_KEY"
+    models: Dict[str, str] = field(default_factory=dict)
+    
+    @property
+    def api_key(self) -> Optional[str]:
+        return os.environ.get(self.api_key_env)
+    
+    def get_cloud_model(self, local_name: str) -> Optional[str]:
+        """Get cloud model ID for a local model name."""
+        return self.models.get(local_name)
+    
+    def is_cloud_model(self, local_name: str) -> bool:
+        """Check if a Model name should be routed to cloud."""
+        return local_name in self.models
+
+@dataclass
+class Config:
+    ollama_host: str = "http://10.0.0.10:11434"
+    qdrant_host: str = "http://10.0.0.22:6333"
+    qdrant_collection: str = "memories"
+    embedding_model: str = "snowflake-arctic-embed2"
+    # Removed system_token_budget - system prompt is never truncated
+    semantic_token_budget: int = 25000
+    context_token_budget: int = 22000
+    semantic_search_turns: int = 2
+    semantic_score_threshold: float = 0.6  # Score threshold for semantic search
+    run_time: str = "02:00"  # Daily curator time
+    full_run_time: str = "03:00"  # Monthly full curator time
+    full_run_day: int = 1  # Day of month for full run (1st)
+    curator_model: str = "gpt-oss:120b"
+    debug: bool = False
+    cloud: CloudConfig = field(default_factory=CloudConfig)
+    
+    @property
+    def vector_size(self) -> int:
+        """Get vector size based on embedding model."""
+        for model_name, dims in EMBEDDING_DIMS.items():
+            if model_name in self.embedding_model:
+                return dims
+        return 1024
+    
+    @classmethod
+    def load(cls, config_path: str = None):
+        """Load config from TOML file.
+        
+        Search order:
+        1. Explicit config_path argument
+        2. VERA_CONFIG_DIR/config.toml
+        3. /app/config/config.toml
+        4. config.toml in app root (backward compatibility)
+        """
+        if config_path is None:
+            # Try config directory first
+            config_path = CONFIG_DIR / "config.toml"
+            if not config_path.exists():
+                # Fall back to app root (backward compatibility)
+                config_path = Path(__file__).parent.parent / "config.toml"
+        else:
+            config_path = Path(config_path)
+        
+        config = cls()
+        
+        if config_path.exists():
+            with open(config_path, "r") as f:
+                data = toml.load(f)
+            
+            if "general" in data:
+                config.ollama_host = data["general"].get("ollama_host", config.ollama_host)
+                config.qdrant_host = data["general"].get("qdrant_host", config.qdrant_host)
+                config.qdrant_collection = data["general"].get("qdrant_collection", config.qdrant_collection)
+                config.embedding_model = data["general"].get("embedding_model", config.embedding_model)
+                config.debug = data["general"].get("debug", config.debug)
+            
+            if "layers" in data:
+                # Note: system_token_budget is ignored (system prompt never truncated)
+                config.semantic_token_budget = data["layers"].get("semantic_token_budget", config.semantic_token_budget)
+                config.context_token_budget = data["layers"].get("context_token_budget", config.context_token_budget)
+                config.semantic_search_turns = data["layers"].get("semantic_search_turns", config.semantic_search_turns)
+                config.semantic_score_threshold = data["layers"].get("semantic_score_threshold", config.semantic_score_threshold)
+            
+            if "curator" in data:
+                config.run_time = data["curator"].get("run_time", config.run_time)
+                config.full_run_time = data["curator"].get("full_run_time", config.full_run_time)
+                config.full_run_day = data["curator"].get("full_run_day", config.full_run_day)
+                config.curator_model = data["curator"].get("curator_model", config.curator_model)
+            
+            if "cloud" in data:
+                cloud_data = data["cloud"]
+                config.cloud = CloudConfig(
+                    enabled=cloud_data.get("enabled", False),
+                    api_base=cloud_data.get("api_base", ""),
+                    api_key_env=cloud_data.get("api_key_env", "OPENROUTER_API_KEY"),
+                    models=cloud_data.get("models", {})
+                )
+        
+        return config
+
+config = Config.load()