chore: remove dead calculate_token_budget, fix hardcoded timestamp

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
refactor: consolidate duplicate QdrantService singleton into singleton.py
2026-04-01 16:13:40 -05:00 · 2026-04-01 16:12:21 -05:00 · 2026-04-01 16:10:24 -05:00 · 2026-04-01 16:07:34 -05:00 · 2026-04-01 16:06:00 -05:00 · 2026-04-01 16:03:16 -05:00
10 changed files with 135 additions and 105 deletions
--- a/app/config.py
+++ b/app/config.py
@@ -1,5 +1,5 @@
 # app/config.py
-import toml
+import tomllib
 import os
 from pathlib import Path
 from dataclasses import dataclass, field
@@ -83,8 +83,8 @@ class Config:
        config = cls()
        
        if config_path.exists():
-            with open(config_path, "r") as f:
-                data = toml.load(f)
+            with open(config_path, "rb") as f:
+                data = tomllib.load(f)
            
            if "general" in data:
                config.ollama_host = data["general"].get("ollama_host", config.ollama_host)
--- a/app/curator.py
+++ b/app/curator.py
@@ -6,7 +6,7 @@ The prompt determines behavior based on current date.
 """
 import logging
 import os
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from typing import List, Dict, Any, Optional
 from pathlib import Path
 import httpx
@@ -49,7 +49,7 @@ class Curator:
        Otherwise runs daily mode (processes recent 24h only).
        The prompt determines behavior based on current date.
        """
-        current_date = datetime.utcnow()
+        current_date = datetime.now(timezone.utc)
        is_monthly = current_date.day == 1
        mode = "MONTHLY" if is_monthly else "DAILY"
        
@@ -169,7 +169,7 @@ Remember: Respond with ONLY valid JSON. No markdown, no explanations, just the J
            return True
        try:
            mem_time = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
-            cutoff = datetime.utcnow() - timedelta(hours=hours)
+            cutoff = datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=hours)
            return mem_time.replace(tzinfo=None) > cutoff
        except (ValueError, TypeError):
            logger.debug(f"Could not parse timestamp: {timestamp}")
--- a/app/main.py
+++ b/app/main.py
@@ -4,7 +4,7 @@ from fastapi.responses import StreamingResponse, JSONResponse
 from contextlib import asynccontextmanager
 import httpx
 import logging
-from datetime import datetime
+from datetime import datetime, timezone

 from .config import config
 from .singleton import get_qdrant_service
@@ -96,7 +96,7 @@ async def api_tags():
        for name in config.cloud.models.keys():
            data["models"].append({
                "name": name,
-                "modified_at": "2026-03-25T00:00:00Z",
+                "modified_at": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
                "size": 0,
                "digest": "cloud",
                "details": {"family": "cloud"}
--- a/app/proxy_handler.py
+++ b/app/proxy_handler.py
@@ -48,17 +48,17 @@ def debug_log(category: str, message: str, data: dict = None):
    if not config.debug:
        return
    
-    from datetime import datetime
+    from datetime import datetime, timezone
    
    # Create logs directory
    log_dir = DEBUG_LOG_DIR
    log_dir.mkdir(parents=True, exist_ok=True)
    
-    today = datetime.utcnow().strftime("%Y-%m-%d")
+    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
    log_path = log_dir / f"debug_{today}.log"
    
    entry = {
-        "timestamp": datetime.utcnow().isoformat() + "Z",
+        "timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
        "category": category,
        "message": message
    }
--- a/app/qdrant_service.py
+++ b/app/qdrant_service.py
@@ -2,7 +2,7 @@
 from qdrant_client import AsyncQdrantClient
 from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
 from typing import List, Dict, Any, Optional
-from datetime import datetime
+from datetime import datetime, timezone
 import uuid
 import logging
 import httpx
@@ -54,7 +54,7 @@ class QdrantService:
        point_id = str(uuid.uuid4())
        embedding = await self.get_embedding(content)
        
-        timestamp = datetime.utcnow().isoformat() + "Z"
+        timestamp = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
        text = content
        if role == "user":
            text = f"User: {content}"
@@ -85,7 +85,7 @@ class QdrantService:
        """Store a complete Q&A turn as one document."""
        await self._ensure_collection()
        
-        timestamp = datetime.utcnow().isoformat() + "Z"
+        timestamp = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
        text = f"User: {user_question}\nAssistant: {assistant_answer}\nTimestamp: {timestamp}"
        
        point_id = str(uuid.uuid4())
--- a/app/singleton.py
+++ b/app/singleton.py
@@ -1,8 +1,9 @@
 """Global singleton instances for Vera-AI."""
+from typing import Optional
 from .qdrant_service import QdrantService
 from .config import config

-_qdrant_service: QdrantService = None
+_qdrant_service: Optional[QdrantService] = None


 def get_qdrant_service() -> QdrantService:
--- a/app/utils.py
+++ b/app/utils.py
@@ -1,9 +1,10 @@
 """Utility functions for vera-ai."""
 from .config import config
+from .singleton import get_qdrant_service
 import tiktoken
 import os
 from typing import List, Dict, Optional
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from pathlib import Path

 # Use cl100k_base encoding (GPT-4 compatible)
@@ -13,24 +14,6 @@ ENCODING = tiktoken.get_encoding("cl100k_base")
 PROMPTS_DIR = Path(os.environ.get("VERA_PROMPTS_DIR", "/app/prompts"))
 STATIC_DIR = Path(os.environ.get("VERA_STATIC_DIR", "/app/static"))

-# Global qdrant_service instance for utils
-_qdrant_service = None
-
-def get_qdrant_service():
-    """Get or create the QdrantService singleton."""
-    global _qdrant_service
-    if _qdrant_service is None:
-        from .config import config
-        from .qdrant_service import QdrantService
-        _qdrant_service = QdrantService(
-            host=config.qdrant_host,
-            collection=config.qdrant_collection,
-            embedding_model=config.embedding_model,
-            vector_size=config.vector_size,
-            ollama_host=config.ollama_host
-        )
-    return _qdrant_service
-
 def count_tokens(text: str) -> int:
    """Count tokens in text."""
    if not text:
@@ -56,7 +39,7 @@ def truncate_by_tokens(text: str, max_tokens: int) -> str:

 def filter_memories_by_time(memories: List[Dict], hours: int = 24) -> List[Dict]:
    """Filter memories from the last N hours."""
-    cutoff = datetime.utcnow() - timedelta(hours=hours)
+    cutoff = datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=hours)
    filtered = []
    for mem in memories:
        ts = mem.get("timestamp")
@@ -64,7 +47,7 @@ def filter_memories_by_time(memories: List[Dict], hours: int = 24) -> List[Dict]
            try:
                # Parse ISO timestamp
                if isinstance(ts, str):
-                    mem_time = datetime.fromisoformat(ts.replace("Z", "+00:00").replace("+00:00", ""))
+                    mem_time = datetime.fromisoformat(ts.replace("Z", "")).replace(tzinfo=None)
                else:
                    mem_time = ts
                if mem_time > cutoff:
@@ -100,15 +83,6 @@ def merge_memories(memories: List[Dict]) -> Dict:
        "ids": ids
    }

-def calculate_token_budget(total_budget: int, system_ratio: float = 0.2, 
-                           semantic_ratio: float = 0.5, context_ratio: float = 0.3) -> Dict[int, int]:
-    """Calculate token budgets for each layer."""
-    return {
-        "system": int(total_budget * system_ratio),
-        "semantic": int(total_budget * semantic_ratio),
-        "context": int(total_budget * context_ratio)
-    }
-
 def load_system_prompt() -> str:
    """Load system prompt from prompts directory."""
    import logging
@@ -219,15 +193,22 @@ async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
    }
    
    # === LAYER 1: System Prompt ===
-    system_content = ""
+    # Caller's system message passes through; systemprompt.md appends if non-empty.
+    caller_system = ""
    for msg in incoming_messages:
        if msg.get("role") == "system":
-            system_content = msg.get("content", "")
+            caller_system = msg.get("content", "")
            break
-    
-    if system_prompt:
-        system_content += "\n\n" + system_prompt
-    
+
+    if caller_system and system_prompt:
+        system_content = caller_system + "\n\n" + system_prompt
+    elif caller_system:
+        system_content = caller_system
+    elif system_prompt:
+        system_content = system_prompt
+    else:
+        system_content = ""
+
    if system_content:
        messages.append({"role": "system", "content": system_content})
        logger.info(f"Layer 1 (system): {count_tokens(system_content)} tokens")
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,11 @@
-fastapi>=0.104.0
-uvicorn[standard]>=0.24.0
-httpx>=0.25.0
-qdrant-client>=1.6.0
-ollama>=0.1.0
-toml>=0.10.2
-tiktoken>=0.5.0
-apscheduler>=3.10.0
-pytest>=7.0.0
-pytest-asyncio>=0.21.0
-pytest-cov>=4.0.0
+fastapi==0.135.2
+uvicorn[standard]==0.42.0
+httpx==0.28.1
+qdrant-client==1.17.1
+ollama==0.6.1
+tiktoken==0.12.0
+apscheduler==3.11.2
+portalocker==3.2.0
+pytest==9.0.2
+pytest-asyncio==1.3.0
+pytest-cov==7.1.0
--- a/tests/test_curator.py
+++ b/tests/test_curator.py
@@ -2,7 +2,7 @@
 import pytest
 import json
 import os
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from pathlib import Path
 from unittest.mock import MagicMock, patch

@@ -77,14 +77,14 @@ class TestIsRecent:
    def test_memory_within_window(self):
        """Memory timestamped 1 hour ago is recent (within 24h)."""
        curator, _ = make_curator()
-        ts = (datetime.utcnow() - timedelta(hours=1)).isoformat() + "Z"
+        ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=1)).isoformat() + "Z"
        memory = {"timestamp": ts}
        assert curator._is_recent(memory, hours=24) is True

    def test_memory_outside_window(self):
        """Memory timestamped 48 hours ago is not recent."""
        curator, _ = make_curator()
-        ts = (datetime.utcnow() - timedelta(hours=48)).isoformat() + "Z"
+        ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=48)).isoformat() + "Z"
        memory = {"timestamp": ts}
        assert curator._is_recent(memory, hours=24) is False

@@ -109,7 +109,7 @@ class TestIsRecent:
    def test_boundary_edge_just_inside(self):
        """Memory at exactly hours-1 minutes ago should be recent."""
        curator, _ = make_curator()
-        ts = (datetime.utcnow() - timedelta(hours=23, minutes=59)).isoformat() + "Z"
+        ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=23, minutes=59)).isoformat() + "Z"
        memory = {"timestamp": ts}
        assert curator._is_recent(memory, hours=24) is True

--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,6 +1,7 @@
 """Tests for utility functions."""
 import pytest
-from app.utils import count_tokens, truncate_by_tokens, parse_curated_turn
+from unittest.mock import AsyncMock, MagicMock, patch
+from app.utils import count_tokens, truncate_by_tokens, parse_curated_turn, build_augmented_messages


 class TestCountTokens:
@@ -90,20 +91,20 @@ class TestFilterMemoriesByTime:

    def test_includes_recent_memory(self):
        """Memory with timestamp in the last 24h should be included."""
-        from datetime import datetime, timedelta
+        from datetime import datetime, timedelta, timezone
        from app.utils import filter_memories_by_time

-        ts = (datetime.utcnow() - timedelta(hours=1)).isoformat()
+        ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=1)).isoformat()
        memories = [{"timestamp": ts, "text": "recent"}]
        result = filter_memories_by_time(memories, hours=24)
        assert len(result) == 1

    def test_excludes_old_memory(self):
        """Memory older than cutoff should be excluded."""
-        from datetime import datetime, timedelta
+        from datetime import datetime, timedelta, timezone
        from app.utils import filter_memories_by_time

-        ts = (datetime.utcnow() - timedelta(hours=48)).isoformat()
+        ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=48)).isoformat()
        memories = [{"timestamp": ts, "text": "old"}]
        result = filter_memories_by_time(memories, hours=24)
        assert len(result) == 0
@@ -124,6 +125,16 @@ class TestFilterMemoriesByTime:
        result = filter_memories_by_time(memories, hours=24)
        assert len(result) == 1

+    def test_z_suffix_old_timestamp_excluded(self):
+        """Regression: chained .replace() was not properly handling Z suffix on old timestamps."""
+        from datetime import datetime, timedelta, timezone
+        from app.utils import filter_memories_by_time
+
+        old_ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=48)).isoformat() + "Z"
+        memories = [{"timestamp": old_ts, "text": "old with Z"}]
+        result = filter_memories_by_time(memories, hours=24)
+        assert len(result) == 0, f"Old Z-suffixed timestamp should be excluded but wasn't: {old_ts}"
+
    def test_empty_list(self):
        """Empty input returns empty list."""
        from app.utils import filter_memories_by_time
@@ -132,10 +143,10 @@ class TestFilterMemoriesByTime:

    def test_z_suffix_timestamp(self):
        """ISO timestamp with Z suffix should be handled correctly."""
-        from datetime import datetime, timedelta
+        from datetime import datetime, timedelta, timezone
        from app.utils import filter_memories_by_time

-        ts = (datetime.utcnow() - timedelta(hours=1)).isoformat() + "Z"
+        ts = (datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(hours=1)).isoformat() + "Z"
        memories = [{"timestamp": ts, "text": "recent with Z"}]
        result = filter_memories_by_time(memories, hours=24)
        assert len(result) == 1
@@ -190,37 +201,6 @@ class TestMergeMemories:
        assert len(result["ids"]) == 2


-class TestCalculateTokenBudget:
-    """Tests for calculate_token_budget function."""
-
-    def test_default_ratios_sum(self):
-        """Default ratios should sum to 1.0 (system+semantic+context)."""
-        from app.utils import calculate_token_budget
-
-        result = calculate_token_budget(1000)
-        assert result["system"] + result["semantic"] + result["context"] == 1000
-
-    def test_custom_ratios(self):
-        """Custom ratios should produce correct proportional budgets."""
-        from app.utils import calculate_token_budget
-
-        result = calculate_token_budget(
-            100, system_ratio=0.1, semantic_ratio=0.6, context_ratio=0.3
-        )
-        assert result["system"] == 10
-        assert result["semantic"] == 60
-        assert result["context"] == 30
-
-    def test_zero_budget(self):
-        """Zero total budget yields all zeros."""
-        from app.utils import calculate_token_budget
-
-        result = calculate_token_budget(0)
-        assert result["system"] == 0
-        assert result["semantic"] == 0
-        assert result["context"] == 0
-
-
 class TestBuildAugmentedMessages:
    """Tests for build_augmented_messages function (mocked I/O)."""

@@ -316,4 +296,72 @@ class TestBuildAugmentedMessages:
            )

        contents = [m["content"] for m in result]
-        assert any("Old question" in c or "Old answer" in c for c in contents)
+        assert any("Old question" in c or "Old answer" in c for c in contents)
+
+    @pytest.mark.asyncio
+    async def test_system_prompt_appends_to_caller_system(self):
+        """systemprompt.md content appends to caller's system message."""
+        import app.utils as utils_module
+
+        mock_qdrant = self._make_qdrant_mock()
+
+        with patch.object(utils_module, "load_system_prompt", return_value="Vera memory context"), \
+             patch.object(utils_module, "get_qdrant_service", return_value=mock_qdrant):
+            incoming = [
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": "Hello"}
+            ]
+            result = await build_augmented_messages(incoming)
+
+            system_msg = result[0]
+            assert system_msg["role"] == "system"
+            assert system_msg["content"] == "You are a helpful assistant.\n\nVera memory context"
+
+    @pytest.mark.asyncio
+    async def test_empty_system_prompt_passthrough(self):
+        """When systemprompt.md is empty, only caller's system message passes through."""
+        import app.utils as utils_module
+
+        mock_qdrant = self._make_qdrant_mock()
+
+        with patch.object(utils_module, "load_system_prompt", return_value=""), \
+             patch.object(utils_module, "get_qdrant_service", return_value=mock_qdrant):
+            incoming = [
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": "Hello"}
+            ]
+            result = await build_augmented_messages(incoming)
+
+            system_msg = result[0]
+            assert system_msg["content"] == "You are a helpful assistant."
+
+    @pytest.mark.asyncio
+    async def test_no_caller_system_with_vera_prompt(self):
+        """When caller sends no system message but systemprompt.md exists, use vera prompt."""
+        import app.utils as utils_module
+
+        mock_qdrant = self._make_qdrant_mock()
+
+        with patch.object(utils_module, "load_system_prompt", return_value="Vera memory context"), \
+             patch.object(utils_module, "get_qdrant_service", return_value=mock_qdrant):
+            incoming = [{"role": "user", "content": "Hello"}]
+            result = await build_augmented_messages(incoming)
+
+            system_msg = result[0]
+            assert system_msg["role"] == "system"
+            assert system_msg["content"] == "Vera memory context"
+
+    @pytest.mark.asyncio
+    async def test_no_system_anywhere(self):
+        """When neither caller nor systemprompt.md provides system content, no system message."""
+        import app.utils as utils_module
+
+        mock_qdrant = self._make_qdrant_mock()
+
+        with patch.object(utils_module, "load_system_prompt", return_value=""), \
+             patch.object(utils_module, "get_qdrant_service", return_value=mock_qdrant):
+            incoming = [{"role": "user", "content": "Hello"}]
+            result = await build_augmented_messages(incoming)
+
+            # First message should be user, not system
+            assert result[0]["role"] == "user"
Author	SHA1	Message	Date
Claude Code	cbe12f0ebd	chore: remove dead calculate_token_budget, fix hardcoded timestamp Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-01 16:13:40 -05:00
Claude Code	9fa5d08ce0	refactor: consolidate duplicate QdrantService singleton into singleton.py Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-01 16:12:21 -05:00
Claude Code	90dd87edeb	fix: system prompt appends to caller's system message, empty = passthrough Handle all 4 combinations of caller system message and systemprompt.md correctly: append when both exist, passthrough when only one exists, omit when neither exists. Fixes leading \n\n when no caller system msg. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-01 16:10:24 -05:00
Claude Code	2801a63b11	fix: correct timestamp parsing bug - chained .replace() was stripping timezone The chained .replace("Z", "+00:00").replace("+00:00", "") calls were undoing each other, causing Z-suffixed timestamps to lose timezone info. Now strips "Z" directly and ensures naive datetime for cutoff comparison. Added regression test for old Z-suffixed timestamps. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-01 16:07:34 -05:00
Claude Code	355986a59f	fix: replace deprecated datetime.utcnow() with timezone-aware alternative Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-01 16:06:00 -05:00
Claude Code	600f9deec1	chore: pin deps to production versions, replace toml with tomllib Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-01 16:03:16 -05:00