Features: - AsyncQdrantClient for non-blocking Qdrant operations - Singleton pattern for QdrantService - Monthly full curation (day 1 at 03:00) - Configurable UID/GID for Docker - Timezone support via TZ env var - Configurable log directory (VERA_LOG_DIR) - Volume mounts for config/, prompts/, logs/ - Standard Docker format with .env file Fixes: - Removed unused system_token_budget - Added semantic_score_threshold config - Fixed streaming response handling - Python-based healthcheck (no curl dependency)
121 lines
5.0 KiB
Python
121 lines
5.0 KiB
Python
# app/config.py
|
|
import toml
|
|
import os
|
|
from pathlib import Path
|
|
from dataclasses import dataclass, field
|
|
from typing import Dict, Optional
|
|
|
|
# Embedding model dimensions
|
|
EMBEDDING_DIMS = {
|
|
"nomic-embed-text": 768,
|
|
"snowflake-arctic-embed2": 1024,
|
|
"mxbai-embed-large": 1024,
|
|
}
|
|
|
|
# Configurable paths (can be overridden via environment)
|
|
CONFIG_DIR = Path(os.environ.get("VERA_CONFIG_DIR", "/app/config"))
|
|
PROMPTS_DIR = Path(os.environ.get("VERA_PROMPTS_DIR", "/app/prompts"))
|
|
STATIC_DIR = Path(os.environ.get("VERA_STATIC_DIR", "/app/static"))
|
|
|
|
@dataclass
|
|
class CloudConfig:
|
|
enabled: bool = False
|
|
api_base: str = ""
|
|
api_key_env: str = "OPENROUTER_API_KEY"
|
|
models: Dict[str, str] = field(default_factory=dict)
|
|
|
|
@property
|
|
def api_key(self) -> Optional[str]:
|
|
return os.environ.get(self.api_key_env)
|
|
|
|
def get_cloud_model(self, local_name: str) -> Optional[str]:
|
|
"""Get cloud model ID for a local model name."""
|
|
return self.models.get(local_name)
|
|
|
|
def is_cloud_model(self, local_name: str) -> bool:
|
|
"""Check if a Model name should be routed to cloud."""
|
|
return local_name in self.models
|
|
|
|
@dataclass
|
|
class Config:
|
|
ollama_host: str = "http://10.0.0.10:11434"
|
|
qdrant_host: str = "http://10.0.0.22:6333"
|
|
qdrant_collection: str = "memories"
|
|
embedding_model: str = "snowflake-arctic-embed2"
|
|
# Removed system_token_budget - system prompt is never truncated
|
|
semantic_token_budget: int = 25000
|
|
context_token_budget: int = 22000
|
|
semantic_search_turns: int = 2
|
|
semantic_score_threshold: float = 0.6 # Score threshold for semantic search
|
|
run_time: str = "02:00" # Daily curator time
|
|
full_run_time: str = "03:00" # Monthly full curator time
|
|
full_run_day: int = 1 # Day of month for full run (1st)
|
|
curator_model: str = "gpt-oss:120b"
|
|
debug: bool = False
|
|
cloud: CloudConfig = field(default_factory=CloudConfig)
|
|
|
|
@property
|
|
def vector_size(self) -> int:
|
|
"""Get vector size based on embedding model."""
|
|
for model_name, dims in EMBEDDING_DIMS.items():
|
|
if model_name in self.embedding_model:
|
|
return dims
|
|
return 1024
|
|
|
|
@classmethod
|
|
def load(cls, config_path: str = None):
|
|
"""Load config from TOML file.
|
|
|
|
Search order:
|
|
1. Explicit config_path argument
|
|
2. VERA_CONFIG_DIR/config.toml
|
|
3. /app/config/config.toml
|
|
4. config.toml in app root (backward compatibility)
|
|
"""
|
|
if config_path is None:
|
|
# Try config directory first
|
|
config_path = CONFIG_DIR / "config.toml"
|
|
if not config_path.exists():
|
|
# Fall back to app root (backward compatibility)
|
|
config_path = Path(__file__).parent.parent / "config.toml"
|
|
else:
|
|
config_path = Path(config_path)
|
|
|
|
config = cls()
|
|
|
|
if config_path.exists():
|
|
with open(config_path, "r") as f:
|
|
data = toml.load(f)
|
|
|
|
if "general" in data:
|
|
config.ollama_host = data["general"].get("ollama_host", config.ollama_host)
|
|
config.qdrant_host = data["general"].get("qdrant_host", config.qdrant_host)
|
|
config.qdrant_collection = data["general"].get("qdrant_collection", config.qdrant_collection)
|
|
config.embedding_model = data["general"].get("embedding_model", config.embedding_model)
|
|
config.debug = data["general"].get("debug", config.debug)
|
|
|
|
if "layers" in data:
|
|
# Note: system_token_budget is ignored (system prompt never truncated)
|
|
config.semantic_token_budget = data["layers"].get("semantic_token_budget", config.semantic_token_budget)
|
|
config.context_token_budget = data["layers"].get("context_token_budget", config.context_token_budget)
|
|
config.semantic_search_turns = data["layers"].get("semantic_search_turns", config.semantic_search_turns)
|
|
config.semantic_score_threshold = data["layers"].get("semantic_score_threshold", config.semantic_score_threshold)
|
|
|
|
if "curator" in data:
|
|
config.run_time = data["curator"].get("run_time", config.run_time)
|
|
config.full_run_time = data["curator"].get("full_run_time", config.full_run_time)
|
|
config.full_run_day = data["curator"].get("full_run_day", config.full_run_day)
|
|
config.curator_model = data["curator"].get("curator_model", config.curator_model)
|
|
|
|
if "cloud" in data:
|
|
cloud_data = data["cloud"]
|
|
config.cloud = CloudConfig(
|
|
enabled=cloud_data.get("enabled", False),
|
|
api_base=cloud_data.get("api_base", ""),
|
|
api_key_env=cloud_data.get("api_key_env", "OPENROUTER_API_KEY"),
|
|
models=cloud_data.get("models", {})
|
|
)
|
|
|
|
return config
|
|
|
|
config = Config.load() |