Compare commits

...

6 Commits

Author SHA1 Message Date
Claude Code
cbe12f0ebd chore: remove dead calculate_token_budget, fix hardcoded timestamp
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-01 16:13:40 -05:00
Claude Code
9fa5d08ce0 refactor: consolidate duplicate QdrantService singleton into singleton.py
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-01 16:12:21 -05:00
Claude Code
90dd87edeb fix: system prompt appends to caller's system message, empty = passthrough
Handle all 4 combinations of caller system message and systemprompt.md
correctly: append when both exist, passthrough when only one exists,
omit when neither exists. Fixes leading \n\n when no caller system msg.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-01 16:10:24 -05:00
Claude Code
2801a63b11 fix: correct timestamp parsing bug - chained .replace() was stripping timezone
The chained .replace("Z", "+00:00").replace("+00:00", "") calls were
undoing each other, causing Z-suffixed timestamps to lose timezone info.
Now strips "Z" directly and ensures naive datetime for cutoff comparison.
Added regression test for old Z-suffixed timestamps.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-01 16:07:34 -05:00
Claude Code
355986a59f fix: replace deprecated datetime.utcnow() with timezone-aware alternative
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-01 16:06:00 -05:00
Claude Code
600f9deec1 chore: pin deps to production versions, replace toml with tomllib
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-01 16:03:16 -05:00
10 changed files with 135 additions and 105 deletions

View File

@@ -1,5 +1,5 @@
# app/config.py
import toml
import tomllib
import os
from pathlib import Path
from dataclasses import dataclass, field
@@ -83,8 +83,8 @@ class Config:
config = cls()
if config_path.exists():
with open(config_path, "r") as f:
data = toml.load(f)
with open(config_path, "rb") as f:
data = tomllib.load(f)
if "general" in data:
config.ollama_host = data["general"].get("ollama_host", config.ollama_host)

View File

@@ -6,7 +6,7 @@ The prompt determines behavior based on current date.
"""
import logging
import os
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from typing import List, Dict, Any, Optional
from pathlib import Path
import httpx
@@ -49,7 +49,7 @@ class Curator:
Otherwise runs daily mode (processes recent 24h only).
The prompt determines behavior based on current date.
"""
current_date = datetime.utcnow()
current_date = datetime.now(timezone.utc)
is_monthly = current_date.day == 1
mode = "MONTHLY" if is_monthly else "DAILY"
@@ -169,7 +169,7 @@ Remember: Respond with ONLY valid JSON. No markdown, no explanations, just the J
return True
try:
mem_time = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
cutoff = datetime.utcnow() - timedelta(hours=hours)
cutoff = datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=hours)
return mem_time.replace(tzinfo=None) > cutoff
except (ValueError, TypeError):
logger.debug(f"Could not parse timestamp: {timestamp}")

View File

@@ -4,7 +4,7 @@ from fastapi.responses import StreamingResponse, JSONResponse
from contextlib import asynccontextmanager
import httpx
import logging
from datetime import datetime
from datetime import datetime, timezone
from .config import config
from .singleton import get_qdrant_service
@@ -96,7 +96,7 @@ async def api_tags():
for name in config.cloud.models.keys():
data["models"].append({
"name": name,
"modified_at": "2026-03-25T00:00:00Z",
"modified_at": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
"size": 0,
"digest": "cloud",
"details": {"family": "cloud"}

View File

@@ -48,17 +48,17 @@ def debug_log(category: str, message: str, data: dict = None):
if not config.debug:
return
from datetime import datetime
from datetime import datetime, timezone
# Create logs directory
log_dir = DEBUG_LOG_DIR
log_dir.mkdir(parents=True, exist_ok=True)
today = datetime.utcnow().strftime("%Y-%m-%d")
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
log_path = log_dir / f"debug_{today}.log"
entry = {
"timestamp": datetime.utcnow().isoformat() + "Z",
"timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
"category": category,
"message": message
}

View File

@@ -2,7 +2,7 @@
from qdrant_client import AsyncQdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
from typing import List, Dict, Any, Optional
from datetime import datetime
from datetime import datetime, timezone
import uuid
import logging
import httpx
@@ -54,7 +54,7 @@ class QdrantService:
point_id = str(uuid.uuid4())
embedding = await self.get_embedding(content)
timestamp = datetime.utcnow().isoformat() + "Z"
timestamp = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
text = content
if role == "user":
text = f"User: {content}"
@@ -85,7 +85,7 @@ class QdrantService:
"""Store a complete Q&A turn as one document."""
await self._ensure_collection()
timestamp = datetime.utcnow().isoformat() + "Z"
timestamp = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
text = f"User: {user_question}\nAssistant: {assistant_answer}\nTimestamp: {timestamp}"
point_id = str(uuid.uuid4())

View File

@@ -1,8 +1,9 @@
"""Global singleton instances for Vera-AI."""
from typing import Optional
from .qdrant_service import QdrantService
from .config import config
_qdrant_service: QdrantService = None
_qdrant_service: Optional[QdrantService] = None
def get_qdrant_service() -> QdrantService:

View File

@@ -1,9 +1,10 @@
"""Utility functions for vera-ai."""
from .config import config
from .singleton import get_qdrant_service
import tiktoken
import os
from typing import List, Dict, Optional
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from pathlib import Path
# Use cl100k_base encoding (GPT-4 compatible)
@@ -13,24 +14,6 @@ ENCODING = tiktoken.get_encoding("cl100k_base")
PROMPTS_DIR = Path(os.environ.get("VERA_PROMPTS_DIR", "/app/prompts"))
STATIC_DIR = Path(os.environ.get("VERA_STATIC_DIR", "/app/static"))
# Global qdrant_service instance for utils
_qdrant_service = None
def get_qdrant_service():
"""Get or create the QdrantService singleton."""
global _qdrant_service
if _qdrant_service is None:
from .config import config
from .qdrant_service import QdrantService
_qdrant_service = QdrantService(
host=config.qdrant_host,
collection=config.qdrant_collection,
embedding_model=config.embedding_model,
vector_size=config.vector_size,
ollama_host=config.ollama_host
)
return _qdrant_service
def count_tokens(text: str) -> int:
"""Count tokens in text."""
if not text:
@@ -56,7 +39,7 @@ def truncate_by_tokens(text: str, max_tokens: int) -> str:
def filter_memories_by_time(memories: List[Dict], hours: int = 24) -> List[Dict]:
"""Filter memories from the last N hours."""
cutoff = datetime.utcnow() - timedelta(hours=hours)
cutoff = datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=hours)
filtered = []
for mem in memories:
ts = mem.get("timestamp")
@@ -64,7 +47,7 @@ def filter_memories_by_time(memories: List[Dict], hours: int = 24) -> List[Dict]
try:
# Parse ISO timestamp
if isinstance(ts, str):
mem_time = datetime.fromisoformat(ts.replace("Z", "+00:00").replace("+00:00", ""))
mem_time = datetime.fromisoformat(ts.replace("Z", "")).replace(tzinfo=None)
else:
mem_time = ts
if mem_time > cutoff:
@@ -100,15 +83,6 @@ def merge_memories(memories: List[Dict]) -> Dict:
"ids": ids
}
def calculate_token_budget(total_budget: int, system_ratio: float = 0.2,
semantic_ratio: float = 0.5, context_ratio: float = 0.3) -> Dict[int, int]:
"""Calculate token budgets for each layer."""
return {
"system": int(total_budget * system_ratio),
"semantic": int(total_budget * semantic_ratio),
"context": int(total_budget * context_ratio)
}
def load_system_prompt() -> str:
"""Load system prompt from prompts directory."""
import logging
@@ -219,15 +193,22 @@ async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
}
# === LAYER 1: System Prompt ===
system_content = ""
# Caller's system message passes through; systemprompt.md appends if non-empty.
caller_system = ""
for msg in incoming_messages:
if msg.get("role") == "system":
system_content = msg.get("content", "")
caller_system = msg.get("content", "")
break
if system_prompt:
system_content += "\n\n" + system_prompt
if caller_system and system_prompt:
system_content = caller_system + "\n\n" + system_prompt
elif caller_system:
system_content = caller_system
elif system_prompt:
system_content = system_prompt
else:
system_content = ""
if system_content:
messages.append({"role": "system", "content": system_content})
logger.info(f"Layer 1 (system): {count_tokens(system_content)} tokens")

View File

@@ -1,11 +1,11 @@
fastapi>=0.104.0
uvicorn[standard]>=0.24.0
httpx>=0.25.0
qdrant-client>=1.6.0
ollama>=0.1.0
toml>=0.10.2
tiktoken>=0.5.0
apscheduler>=3.10.0
pytest>=7.0.0
pytest-asyncio>=0.21.0
pytest-cov>=4.0.0
fastapi==0.135.2
uvicorn[standard]==0.42.0
httpx==0.28.1
qdrant-client==1.17.1
ollama==0.6.1
tiktoken==0.12.0
apscheduler==3.11.2
portalocker==3.2.0
pytest==9.0.2
pytest-asyncio==1.3.0
pytest-cov==7.1.0

View File

@@ -2,7 +2,7 @@
import pytest
import json
import os
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from pathlib import Path
from unittest.mock import MagicMock, patch
@@ -77,14 +77,14 @@ class TestIsRecent:
def test_memory_within_window(self):
"""Memory timestamped 1 hour ago is recent (within 24h)."""
curator, _ = make_curator()
ts = (datetime.utcnow() - timedelta(hours=1)).isoformat() + "Z"
ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=1)).isoformat() + "Z"
memory = {"timestamp": ts}
assert curator._is_recent(memory, hours=24) is True
def test_memory_outside_window(self):
"""Memory timestamped 48 hours ago is not recent."""
curator, _ = make_curator()
ts = (datetime.utcnow() - timedelta(hours=48)).isoformat() + "Z"
ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=48)).isoformat() + "Z"
memory = {"timestamp": ts}
assert curator._is_recent(memory, hours=24) is False
@@ -109,7 +109,7 @@ class TestIsRecent:
def test_boundary_edge_just_inside(self):
"""Memory at exactly hours-1 minutes ago should be recent."""
curator, _ = make_curator()
ts = (datetime.utcnow() - timedelta(hours=23, minutes=59)).isoformat() + "Z"
ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=23, minutes=59)).isoformat() + "Z"
memory = {"timestamp": ts}
assert curator._is_recent(memory, hours=24) is True

View File

@@ -1,6 +1,7 @@
"""Tests for utility functions."""
import pytest
from app.utils import count_tokens, truncate_by_tokens, parse_curated_turn
from unittest.mock import AsyncMock, MagicMock, patch
from app.utils import count_tokens, truncate_by_tokens, parse_curated_turn, build_augmented_messages
class TestCountTokens:
@@ -90,20 +91,20 @@ class TestFilterMemoriesByTime:
def test_includes_recent_memory(self):
"""Memory with timestamp in the last 24h should be included."""
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from app.utils import filter_memories_by_time
ts = (datetime.utcnow() - timedelta(hours=1)).isoformat()
ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=1)).isoformat()
memories = [{"timestamp": ts, "text": "recent"}]
result = filter_memories_by_time(memories, hours=24)
assert len(result) == 1
def test_excludes_old_memory(self):
"""Memory older than cutoff should be excluded."""
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from app.utils import filter_memories_by_time
ts = (datetime.utcnow() - timedelta(hours=48)).isoformat()
ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=48)).isoformat()
memories = [{"timestamp": ts, "text": "old"}]
result = filter_memories_by_time(memories, hours=24)
assert len(result) == 0
@@ -124,6 +125,16 @@ class TestFilterMemoriesByTime:
result = filter_memories_by_time(memories, hours=24)
assert len(result) == 1
def test_z_suffix_old_timestamp_excluded(self):
"""Regression: chained .replace() was not properly handling Z suffix on old timestamps."""
from datetime import datetime, timedelta, timezone
from app.utils import filter_memories_by_time
old_ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=48)).isoformat() + "Z"
memories = [{"timestamp": old_ts, "text": "old with Z"}]
result = filter_memories_by_time(memories, hours=24)
assert len(result) == 0, f"Old Z-suffixed timestamp should be excluded but wasn't: {old_ts}"
def test_empty_list(self):
"""Empty input returns empty list."""
from app.utils import filter_memories_by_time
@@ -132,10 +143,10 @@ class TestFilterMemoriesByTime:
def test_z_suffix_timestamp(self):
"""ISO timestamp with Z suffix should be handled correctly."""
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from app.utils import filter_memories_by_time
ts = (datetime.utcnow() - timedelta(hours=1)).isoformat() + "Z"
ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=1)).isoformat() + "Z"
memories = [{"timestamp": ts, "text": "recent with Z"}]
result = filter_memories_by_time(memories, hours=24)
assert len(result) == 1
@@ -190,37 +201,6 @@ class TestMergeMemories:
assert len(result["ids"]) == 2
class TestCalculateTokenBudget:
"""Tests for calculate_token_budget function."""
def test_default_ratios_sum(self):
"""Default ratios should sum to 1.0 (system+semantic+context)."""
from app.utils import calculate_token_budget
result = calculate_token_budget(1000)
assert result["system"] + result["semantic"] + result["context"] == 1000
def test_custom_ratios(self):
"""Custom ratios should produce correct proportional budgets."""
from app.utils import calculate_token_budget
result = calculate_token_budget(
100, system_ratio=0.1, semantic_ratio=0.6, context_ratio=0.3
)
assert result["system"] == 10
assert result["semantic"] == 60
assert result["context"] == 30
def test_zero_budget(self):
"""Zero total budget yields all zeros."""
from app.utils import calculate_token_budget
result = calculate_token_budget(0)
assert result["system"] == 0
assert result["semantic"] == 0
assert result["context"] == 0
class TestBuildAugmentedMessages:
"""Tests for build_augmented_messages function (mocked I/O)."""
@@ -316,4 +296,72 @@ class TestBuildAugmentedMessages:
)
contents = [m["content"] for m in result]
assert any("Old question" in c or "Old answer" in c for c in contents)
assert any("Old question" in c or "Old answer" in c for c in contents)
@pytest.mark.asyncio
async def test_system_prompt_appends_to_caller_system(self):
"""systemprompt.md content appends to caller's system message."""
import app.utils as utils_module
mock_qdrant = self._make_qdrant_mock()
with patch.object(utils_module, "load_system_prompt", return_value="Vera memory context"), \
patch.object(utils_module, "get_qdrant_service", return_value=mock_qdrant):
incoming = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello"}
]
result = await build_augmented_messages(incoming)
system_msg = result[0]
assert system_msg["role"] == "system"
assert system_msg["content"] == "You are a helpful assistant.\n\nVera memory context"
@pytest.mark.asyncio
async def test_empty_system_prompt_passthrough(self):
"""When systemprompt.md is empty, only caller's system message passes through."""
import app.utils as utils_module
mock_qdrant = self._make_qdrant_mock()
with patch.object(utils_module, "load_system_prompt", return_value=""), \
patch.object(utils_module, "get_qdrant_service", return_value=mock_qdrant):
incoming = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello"}
]
result = await build_augmented_messages(incoming)
system_msg = result[0]
assert system_msg["content"] == "You are a helpful assistant."
@pytest.mark.asyncio
async def test_no_caller_system_with_vera_prompt(self):
"""When caller sends no system message but systemprompt.md exists, use vera prompt."""
import app.utils as utils_module
mock_qdrant = self._make_qdrant_mock()
with patch.object(utils_module, "load_system_prompt", return_value="Vera memory context"), \
patch.object(utils_module, "get_qdrant_service", return_value=mock_qdrant):
incoming = [{"role": "user", "content": "Hello"}]
result = await build_augmented_messages(incoming)
system_msg = result[0]
assert system_msg["role"] == "system"
assert system_msg["content"] == "Vera memory context"
@pytest.mark.asyncio
async def test_no_system_anywhere(self):
"""When neither caller nor systemprompt.md provides system content, no system message."""
import app.utils as utils_module
mock_qdrant = self._make_qdrant_mock()
with patch.object(utils_module, "load_system_prompt", return_value=""), \
patch.object(utils_module, "get_qdrant_service", return_value=mock_qdrant):
incoming = [{"role": "user", "content": "Hello"}]
result = await build_augmented_messages(incoming)
# First message should be user, not system
assert result[0]["role"] == "user"