Compare commits
6 Commits
9774875173
...
cbe12f0ebd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cbe12f0ebd | ||
|
|
9fa5d08ce0 | ||
|
|
90dd87edeb | ||
|
|
2801a63b11 | ||
|
|
355986a59f | ||
|
|
600f9deec1 |
@@ -1,5 +1,5 @@
|
||||
# app/config.py
|
||||
import toml
|
||||
import tomllib
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
@@ -83,8 +83,8 @@ class Config:
|
||||
config = cls()
|
||||
|
||||
if config_path.exists():
|
||||
with open(config_path, "r") as f:
|
||||
data = toml.load(f)
|
||||
with open(config_path, "rb") as f:
|
||||
data = tomllib.load(f)
|
||||
|
||||
if "general" in data:
|
||||
config.ollama_host = data["general"].get("ollama_host", config.ollama_host)
|
||||
|
||||
@@ -6,7 +6,7 @@ The prompt determines behavior based on current date.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import List, Dict, Any, Optional
|
||||
from pathlib import Path
|
||||
import httpx
|
||||
@@ -49,7 +49,7 @@ class Curator:
|
||||
Otherwise runs daily mode (processes recent 24h only).
|
||||
The prompt determines behavior based on current date.
|
||||
"""
|
||||
current_date = datetime.utcnow()
|
||||
current_date = datetime.now(timezone.utc)
|
||||
is_monthly = current_date.day == 1
|
||||
mode = "MONTHLY" if is_monthly else "DAILY"
|
||||
|
||||
@@ -169,7 +169,7 @@ Remember: Respond with ONLY valid JSON. No markdown, no explanations, just the J
|
||||
return True
|
||||
try:
|
||||
mem_time = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
|
||||
cutoff = datetime.utcnow() - timedelta(hours=hours)
|
||||
cutoff = datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=hours)
|
||||
return mem_time.replace(tzinfo=None) > cutoff
|
||||
except (ValueError, TypeError):
|
||||
logger.debug(f"Could not parse timestamp: {timestamp}")
|
||||
|
||||
@@ -4,7 +4,7 @@ from fastapi.responses import StreamingResponse, JSONResponse
|
||||
from contextlib import asynccontextmanager
|
||||
import httpx
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from .config import config
|
||||
from .singleton import get_qdrant_service
|
||||
@@ -96,7 +96,7 @@ async def api_tags():
|
||||
for name in config.cloud.models.keys():
|
||||
data["models"].append({
|
||||
"name": name,
|
||||
"modified_at": "2026-03-25T00:00:00Z",
|
||||
"modified_at": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
||||
"size": 0,
|
||||
"digest": "cloud",
|
||||
"details": {"family": "cloud"}
|
||||
|
||||
@@ -48,17 +48,17 @@ def debug_log(category: str, message: str, data: dict = None):
|
||||
if not config.debug:
|
||||
return
|
||||
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
|
||||
# Create logs directory
|
||||
log_dir = DEBUG_LOG_DIR
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
today = datetime.utcnow().strftime("%Y-%m-%d")
|
||||
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
log_path = log_dir / f"debug_{today}.log"
|
||||
|
||||
entry = {
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||
"timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
||||
"category": category,
|
||||
"message": message
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
from qdrant_client import AsyncQdrantClient
|
||||
from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
import uuid
|
||||
import logging
|
||||
import httpx
|
||||
@@ -54,7 +54,7 @@ class QdrantService:
|
||||
point_id = str(uuid.uuid4())
|
||||
embedding = await self.get_embedding(content)
|
||||
|
||||
timestamp = datetime.utcnow().isoformat() + "Z"
|
||||
timestamp = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
||||
text = content
|
||||
if role == "user":
|
||||
text = f"User: {content}"
|
||||
@@ -85,7 +85,7 @@ class QdrantService:
|
||||
"""Store a complete Q&A turn as one document."""
|
||||
await self._ensure_collection()
|
||||
|
||||
timestamp = datetime.utcnow().isoformat() + "Z"
|
||||
timestamp = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
||||
text = f"User: {user_question}\nAssistant: {assistant_answer}\nTimestamp: {timestamp}"
|
||||
|
||||
point_id = str(uuid.uuid4())
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
"""Global singleton instances for Vera-AI."""
|
||||
from typing import Optional
|
||||
from .qdrant_service import QdrantService
|
||||
from .config import config
|
||||
|
||||
_qdrant_service: QdrantService = None
|
||||
_qdrant_service: Optional[QdrantService] = None
|
||||
|
||||
|
||||
def get_qdrant_service() -> QdrantService:
|
||||
|
||||
49
app/utils.py
49
app/utils.py
@@ -1,9 +1,10 @@
|
||||
"""Utility functions for vera-ai."""
|
||||
from .config import config
|
||||
from .singleton import get_qdrant_service
|
||||
import tiktoken
|
||||
import os
|
||||
from typing import List, Dict, Optional
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# Use cl100k_base encoding (GPT-4 compatible)
|
||||
@@ -13,24 +14,6 @@ ENCODING = tiktoken.get_encoding("cl100k_base")
|
||||
PROMPTS_DIR = Path(os.environ.get("VERA_PROMPTS_DIR", "/app/prompts"))
|
||||
STATIC_DIR = Path(os.environ.get("VERA_STATIC_DIR", "/app/static"))
|
||||
|
||||
# Global qdrant_service instance for utils
|
||||
_qdrant_service = None
|
||||
|
||||
def get_qdrant_service():
|
||||
"""Get or create the QdrantService singleton."""
|
||||
global _qdrant_service
|
||||
if _qdrant_service is None:
|
||||
from .config import config
|
||||
from .qdrant_service import QdrantService
|
||||
_qdrant_service = QdrantService(
|
||||
host=config.qdrant_host,
|
||||
collection=config.qdrant_collection,
|
||||
embedding_model=config.embedding_model,
|
||||
vector_size=config.vector_size,
|
||||
ollama_host=config.ollama_host
|
||||
)
|
||||
return _qdrant_service
|
||||
|
||||
def count_tokens(text: str) -> int:
|
||||
"""Count tokens in text."""
|
||||
if not text:
|
||||
@@ -56,7 +39,7 @@ def truncate_by_tokens(text: str, max_tokens: int) -> str:
|
||||
|
||||
def filter_memories_by_time(memories: List[Dict], hours: int = 24) -> List[Dict]:
|
||||
"""Filter memories from the last N hours."""
|
||||
cutoff = datetime.utcnow() - timedelta(hours=hours)
|
||||
cutoff = datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=hours)
|
||||
filtered = []
|
||||
for mem in memories:
|
||||
ts = mem.get("timestamp")
|
||||
@@ -64,7 +47,7 @@ def filter_memories_by_time(memories: List[Dict], hours: int = 24) -> List[Dict]
|
||||
try:
|
||||
# Parse ISO timestamp
|
||||
if isinstance(ts, str):
|
||||
mem_time = datetime.fromisoformat(ts.replace("Z", "+00:00").replace("+00:00", ""))
|
||||
mem_time = datetime.fromisoformat(ts.replace("Z", "")).replace(tzinfo=None)
|
||||
else:
|
||||
mem_time = ts
|
||||
if mem_time > cutoff:
|
||||
@@ -100,15 +83,6 @@ def merge_memories(memories: List[Dict]) -> Dict:
|
||||
"ids": ids
|
||||
}
|
||||
|
||||
def calculate_token_budget(total_budget: int, system_ratio: float = 0.2,
|
||||
semantic_ratio: float = 0.5, context_ratio: float = 0.3) -> Dict[int, int]:
|
||||
"""Calculate token budgets for each layer."""
|
||||
return {
|
||||
"system": int(total_budget * system_ratio),
|
||||
"semantic": int(total_budget * semantic_ratio),
|
||||
"context": int(total_budget * context_ratio)
|
||||
}
|
||||
|
||||
def load_system_prompt() -> str:
|
||||
"""Load system prompt from prompts directory."""
|
||||
import logging
|
||||
@@ -219,14 +193,21 @@ async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
|
||||
}
|
||||
|
||||
# === LAYER 1: System Prompt ===
|
||||
system_content = ""
|
||||
# Caller's system message passes through; systemprompt.md appends if non-empty.
|
||||
caller_system = ""
|
||||
for msg in incoming_messages:
|
||||
if msg.get("role") == "system":
|
||||
system_content = msg.get("content", "")
|
||||
caller_system = msg.get("content", "")
|
||||
break
|
||||
|
||||
if system_prompt:
|
||||
system_content += "\n\n" + system_prompt
|
||||
if caller_system and system_prompt:
|
||||
system_content = caller_system + "\n\n" + system_prompt
|
||||
elif caller_system:
|
||||
system_content = caller_system
|
||||
elif system_prompt:
|
||||
system_content = system_prompt
|
||||
else:
|
||||
system_content = ""
|
||||
|
||||
if system_content:
|
||||
messages.append({"role": "system", "content": system_content})
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
fastapi>=0.104.0
|
||||
uvicorn[standard]>=0.24.0
|
||||
httpx>=0.25.0
|
||||
qdrant-client>=1.6.0
|
||||
ollama>=0.1.0
|
||||
toml>=0.10.2
|
||||
tiktoken>=0.5.0
|
||||
apscheduler>=3.10.0
|
||||
pytest>=7.0.0
|
||||
pytest-asyncio>=0.21.0
|
||||
pytest-cov>=4.0.0
|
||||
fastapi==0.135.2
|
||||
uvicorn[standard]==0.42.0
|
||||
httpx==0.28.1
|
||||
qdrant-client==1.17.1
|
||||
ollama==0.6.1
|
||||
tiktoken==0.12.0
|
||||
apscheduler==3.11.2
|
||||
portalocker==3.2.0
|
||||
pytest==9.0.2
|
||||
pytest-asyncio==1.3.0
|
||||
pytest-cov==7.1.0
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
import pytest
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
@@ -77,14 +77,14 @@ class TestIsRecent:
|
||||
def test_memory_within_window(self):
|
||||
"""Memory timestamped 1 hour ago is recent (within 24h)."""
|
||||
curator, _ = make_curator()
|
||||
ts = (datetime.utcnow() - timedelta(hours=1)).isoformat() + "Z"
|
||||
ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=1)).isoformat() + "Z"
|
||||
memory = {"timestamp": ts}
|
||||
assert curator._is_recent(memory, hours=24) is True
|
||||
|
||||
def test_memory_outside_window(self):
|
||||
"""Memory timestamped 48 hours ago is not recent."""
|
||||
curator, _ = make_curator()
|
||||
ts = (datetime.utcnow() - timedelta(hours=48)).isoformat() + "Z"
|
||||
ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=48)).isoformat() + "Z"
|
||||
memory = {"timestamp": ts}
|
||||
assert curator._is_recent(memory, hours=24) is False
|
||||
|
||||
@@ -109,7 +109,7 @@ class TestIsRecent:
|
||||
def test_boundary_edge_just_inside(self):
|
||||
"""Memory at exactly hours-1 minutes ago should be recent."""
|
||||
curator, _ = make_curator()
|
||||
ts = (datetime.utcnow() - timedelta(hours=23, minutes=59)).isoformat() + "Z"
|
||||
ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=23, minutes=59)).isoformat() + "Z"
|
||||
memory = {"timestamp": ts}
|
||||
assert curator._is_recent(memory, hours=24) is True
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Tests for utility functions."""
|
||||
import pytest
|
||||
from app.utils import count_tokens, truncate_by_tokens, parse_curated_turn
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from app.utils import count_tokens, truncate_by_tokens, parse_curated_turn, build_augmented_messages
|
||||
|
||||
|
||||
class TestCountTokens:
|
||||
@@ -90,20 +91,20 @@ class TestFilterMemoriesByTime:
|
||||
|
||||
def test_includes_recent_memory(self):
|
||||
"""Memory with timestamp in the last 24h should be included."""
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from app.utils import filter_memories_by_time
|
||||
|
||||
ts = (datetime.utcnow() - timedelta(hours=1)).isoformat()
|
||||
ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=1)).isoformat()
|
||||
memories = [{"timestamp": ts, "text": "recent"}]
|
||||
result = filter_memories_by_time(memories, hours=24)
|
||||
assert len(result) == 1
|
||||
|
||||
def test_excludes_old_memory(self):
|
||||
"""Memory older than cutoff should be excluded."""
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from app.utils import filter_memories_by_time
|
||||
|
||||
ts = (datetime.utcnow() - timedelta(hours=48)).isoformat()
|
||||
ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=48)).isoformat()
|
||||
memories = [{"timestamp": ts, "text": "old"}]
|
||||
result = filter_memories_by_time(memories, hours=24)
|
||||
assert len(result) == 0
|
||||
@@ -124,6 +125,16 @@ class TestFilterMemoriesByTime:
|
||||
result = filter_memories_by_time(memories, hours=24)
|
||||
assert len(result) == 1
|
||||
|
||||
def test_z_suffix_old_timestamp_excluded(self):
|
||||
"""Regression: chained .replace() was not properly handling Z suffix on old timestamps."""
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from app.utils import filter_memories_by_time
|
||||
|
||||
old_ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=48)).isoformat() + "Z"
|
||||
memories = [{"timestamp": old_ts, "text": "old with Z"}]
|
||||
result = filter_memories_by_time(memories, hours=24)
|
||||
assert len(result) == 0, f"Old Z-suffixed timestamp should be excluded but wasn't: {old_ts}"
|
||||
|
||||
def test_empty_list(self):
|
||||
"""Empty input returns empty list."""
|
||||
from app.utils import filter_memories_by_time
|
||||
@@ -132,10 +143,10 @@ class TestFilterMemoriesByTime:
|
||||
|
||||
def test_z_suffix_timestamp(self):
|
||||
"""ISO timestamp with Z suffix should be handled correctly."""
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from app.utils import filter_memories_by_time
|
||||
|
||||
ts = (datetime.utcnow() - timedelta(hours=1)).isoformat() + "Z"
|
||||
ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=1)).isoformat() + "Z"
|
||||
memories = [{"timestamp": ts, "text": "recent with Z"}]
|
||||
result = filter_memories_by_time(memories, hours=24)
|
||||
assert len(result) == 1
|
||||
@@ -190,37 +201,6 @@ class TestMergeMemories:
|
||||
assert len(result["ids"]) == 2
|
||||
|
||||
|
||||
class TestCalculateTokenBudget:
|
||||
"""Tests for calculate_token_budget function."""
|
||||
|
||||
def test_default_ratios_sum(self):
|
||||
"""Default ratios should sum to 1.0 (system+semantic+context)."""
|
||||
from app.utils import calculate_token_budget
|
||||
|
||||
result = calculate_token_budget(1000)
|
||||
assert result["system"] + result["semantic"] + result["context"] == 1000
|
||||
|
||||
def test_custom_ratios(self):
|
||||
"""Custom ratios should produce correct proportional budgets."""
|
||||
from app.utils import calculate_token_budget
|
||||
|
||||
result = calculate_token_budget(
|
||||
100, system_ratio=0.1, semantic_ratio=0.6, context_ratio=0.3
|
||||
)
|
||||
assert result["system"] == 10
|
||||
assert result["semantic"] == 60
|
||||
assert result["context"] == 30
|
||||
|
||||
def test_zero_budget(self):
|
||||
"""Zero total budget yields all zeros."""
|
||||
from app.utils import calculate_token_budget
|
||||
|
||||
result = calculate_token_budget(0)
|
||||
assert result["system"] == 0
|
||||
assert result["semantic"] == 0
|
||||
assert result["context"] == 0
|
||||
|
||||
|
||||
class TestBuildAugmentedMessages:
|
||||
"""Tests for build_augmented_messages function (mocked I/O)."""
|
||||
|
||||
@@ -317,3 +297,71 @@ class TestBuildAugmentedMessages:
|
||||
|
||||
contents = [m["content"] for m in result]
|
||||
assert any("Old question" in c or "Old answer" in c for c in contents)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_system_prompt_appends_to_caller_system(self):
|
||||
"""systemprompt.md content appends to caller's system message."""
|
||||
import app.utils as utils_module
|
||||
|
||||
mock_qdrant = self._make_qdrant_mock()
|
||||
|
||||
with patch.object(utils_module, "load_system_prompt", return_value="Vera memory context"), \
|
||||
patch.object(utils_module, "get_qdrant_service", return_value=mock_qdrant):
|
||||
incoming = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hello"}
|
||||
]
|
||||
result = await build_augmented_messages(incoming)
|
||||
|
||||
system_msg = result[0]
|
||||
assert system_msg["role"] == "system"
|
||||
assert system_msg["content"] == "You are a helpful assistant.\n\nVera memory context"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_system_prompt_passthrough(self):
|
||||
"""When systemprompt.md is empty, only caller's system message passes through."""
|
||||
import app.utils as utils_module
|
||||
|
||||
mock_qdrant = self._make_qdrant_mock()
|
||||
|
||||
with patch.object(utils_module, "load_system_prompt", return_value=""), \
|
||||
patch.object(utils_module, "get_qdrant_service", return_value=mock_qdrant):
|
||||
incoming = [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hello"}
|
||||
]
|
||||
result = await build_augmented_messages(incoming)
|
||||
|
||||
system_msg = result[0]
|
||||
assert system_msg["content"] == "You are a helpful assistant."
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_caller_system_with_vera_prompt(self):
|
||||
"""When caller sends no system message but systemprompt.md exists, use vera prompt."""
|
||||
import app.utils as utils_module
|
||||
|
||||
mock_qdrant = self._make_qdrant_mock()
|
||||
|
||||
with patch.object(utils_module, "load_system_prompt", return_value="Vera memory context"), \
|
||||
patch.object(utils_module, "get_qdrant_service", return_value=mock_qdrant):
|
||||
incoming = [{"role": "user", "content": "Hello"}]
|
||||
result = await build_augmented_messages(incoming)
|
||||
|
||||
system_msg = result[0]
|
||||
assert system_msg["role"] == "system"
|
||||
assert system_msg["content"] == "Vera memory context"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_system_anywhere(self):
|
||||
"""When neither caller nor systemprompt.md provides system content, no system message."""
|
||||
import app.utils as utils_module
|
||||
|
||||
mock_qdrant = self._make_qdrant_mock()
|
||||
|
||||
with patch.object(utils_module, "load_system_prompt", return_value=""), \
|
||||
patch.object(utils_module, "get_qdrant_service", return_value=mock_qdrant):
|
||||
incoming = [{"role": "user", "content": "Hello"}]
|
||||
result = await build_augmented_messages(incoming)
|
||||
|
||||
# First message should be user, not system
|
||||
assert result[0]["role"] == "user"
|
||||
Reference in New Issue
Block a user