tag/v2.0.1/app/context_handler.py

"""Context handler - builds 4-layer context for every request."""
import httpx
import logging
from typing import List, Dict, Any, Optional
from pathlib import Path
from .config import Config
from .qdrant_service import QdrantService
from .utils import count_tokens, truncate_by_tokens

logger = logging.getLogger(__name__)


class ContextHandler:
    def __init__(self, config: Config):
        self.config = config
        self.qdrant = QdrantService(
            host=config.qdrant_host,
            collection=config.qdrant_collection,
            embedding_model=config.embedding_model,
            ollama_host=config.ollama_host
        )
        self.system_prompt = self._load_system_prompt()
    
    def _load_system_prompt(self) -> str:
        """Load system prompt from static/systemprompt.md."""
        try:
            path = Path(__file__).parent.parent / "static" / "systemprompt.md"
            return path.read_text().strip()
        except FileNotFoundError:
            logger.error("systemprompt.md not found - required file")
            raise
    
    async def process(self, messages: List[Dict], model: str, stream: bool = False) -> Dict:
        """Process chat request through 4-layer context."""
        # Get user question (last user message)
        user_question = ""
        for msg in reversed(messages):
            if msg.get("role") == "user":
                user_question = msg.get("content", "")
                break
        
        # Get messages for semantic search (last N turns)
        search_messages = []
        for msg in messages[-self.config.semantic_search_turns:]:
            if msg.get("role") in ("user", "assistant"):
                search_messages.append(msg.get("content", ""))
        
        # Build the 4-layer context messages
        context_messages = await self.build_context_messages(
            incoming_system=next((m for m in messages if m.get("role") == "system"), None),
            user_question=user_question,
            search_context=" ".join(search_messages)
        )
        
        # Forward to Ollama
        async with httpx.AsyncClient(timeout=120.0) as client:
            response = await client.post(
                f"{self.config.ollama_host}/api/chat",
                json={"model": model, "messages": context_messages, "stream": stream}
            )
            result = response.json()
        
        # Store the Q&A turn in Qdrant
        assistant_msg = result.get("message", {}).get("content", "")
        await self.qdrant.store_qa_turn(user_question, assistant_msg)
        
        return result
    
    def _parse_curated_turn(self, text: str) -> List[Dict]:
        """Parse a curated turn into alternating user/assistant messages.
        
        Input format:
            User: [question]
            Assistant: [answer]
            Timestamp: ISO datetime
        
        Returns list of message dicts with role and content.
        """
        messages = []
        lines = text.strip().split("\n")
        
        current_role = None
        current_content = []
        
        for line in lines:
            line = line.strip()
            if line.startswith("User:"):
                # Save previous content if exists
                if current_role and current_content:
                    messages.append({
                        "role": current_role,
                        "content": "\n".join(current_content).strip()
                    })
                current_role = "user"
                current_content = [line[5:].strip()]  # Remove "User:" prefix
            elif line.startswith("Assistant:"):
                # Save previous content if exists
                if current_role and current_content:
                    messages.append({
                        "role": current_role,
                        "content": "\n".join(current_content).strip()
                    })
                current_role = "assistant"
                current_content = [line[10:].strip()]  # Remove "Assistant:" prefix
            elif line.startswith("Timestamp:"):
                # Ignore timestamp line
                continue
            elif current_role:
                # Continuation of current message
                current_content.append(line)
        
        # Save last message
        if current_role and current_content:
            messages.append({
                "role": current_role,
                "content": "\n".join(current_content).strip()
            })
        
        return messages
    
    async def build_context_messages(self, incoming_system: Optional[Dict], user_question: str, search_context: str) -> List[Dict]:
        """Build 4-layer context messages array."""
        messages = []
        token_budget = {
            "semantic": self.config.semantic_token_budget,
            "context": self.config.context_token_budget
        }
        
        # === LAYER 1: System Prompt (pass through unchanged) ===
        # DO NOT truncate - preserve system prompt entirely
        system_content = ""
        if incoming_system:
            system_content = incoming_system.get("content", "")
            logger.info(f"System layer: preserved incoming system {len(system_content)} chars, {count_tokens(system_content)} tokens")
        
        # Add Vera context info if present (small, just metadata)
        if self.system_prompt.strip():
            system_content += "\n\n" + self.system_prompt
            logger.info(f"System layer: added vera context {len(self.system_prompt)} chars")
        
        messages.append({"role": "system", "content": system_content})
        
        # === LAYER 2: Semantic Layer (curated memories) ===
        # Search for curated blocks only
        semantic_results = await self.qdrant.semantic_search(
            query=search_context if search_context else user_question,
            limit=20,
            score_threshold=self.config.semantic_score_threshold,
            entry_type="curated"
        )
        
        # Parse curated turns into alternating user/assistant messages
        semantic_messages = []
        semantic_tokens_used = 0
        
        for result in semantic_results:
            payload = result.get("payload", {})
            text = payload.get("text", "")
            if text:
                parsed = self._parse_curated_turn(text)
                for msg in parsed:
                    msg_tokens = count_tokens(msg.get("content", ""))
                    if semantic_tokens_used + msg_tokens <= token_budget["semantic"]:
                        semantic_messages.append(msg)
                        semantic_tokens_used += msg_tokens
                    else:
                        break
        
        # Add parsed messages to context
        for msg in semantic_messages:
            messages.append(msg)
        
        if semantic_messages:
            logger.info(f"Semantic layer: {len(semantic_messages)} messages, ~{semantic_tokens_used} tokens")
        
        # === LAYER 3: Context Layer (recent turns) ===
        recent_turns = await self.qdrant.get_recent_turns(limit=50)
        
        context_messages_parsed = []
        context_tokens_used = 0
        
        for turn in reversed(recent_turns):  # Oldest first
            payload = turn.get("payload", {})
            text = payload.get("text", "")
            entry_type = payload.get("type", "raw")
            
            if text:
                # Parse turn into messages
                parsed = self._parse_curated_turn(text)
                
                for msg in parsed:
                    msg_tokens = count_tokens(msg.get("content", ""))
                    if context_tokens_used + msg_tokens <= token_budget["context"]:
                        context_messages_parsed.append(msg)
                        context_tokens_used += msg_tokens
                    else:
                        break
        
        for msg in context_messages_parsed:
            messages.append(msg)
        
        if context_messages_parsed:
            logger.info(f"Context layer: {len(context_messages_parsed)} messages, ~{context_tokens_used} tokens")
        
        # === LAYER 4: Current Question ===
        messages.append({"role": "user", "content": user_question})
        
        return messages
Initial commit: Vera-AI v2 with async Qdrant, singleton pattern, monthly curation, and configurable UID/GID/TZ Features: - AsyncQdrantClient for non-blocking Qdrant operations - Singleton pattern for QdrantService - Monthly full curation (day 1 at 03:00) - Configurable UID/GID for Docker - Timezone support via TZ env var - Configurable log directory (VERA_LOG_DIR) - Volume mounts for config/, prompts/, logs/ - Standard Docker format with .env file Fixes: - Removed unused system_token_budget - Added semantic_score_threshold config - Fixed streaming response handling - Python-based healthcheck (no curl dependency) 2026-03-26 12:37:25 -05:00			`"""Context handler - builds 4-layer context for every request."""`
			`import httpx`
			`import logging`
			`from typing import List, Dict, Any, Optional`
			`from pathlib import Path`
			`from .config import Config`
			`from .qdrant_service import QdrantService`
			`from .utils import count_tokens, truncate_by_tokens`

			`logger = logging.getLogger(__name__)`


			`class ContextHandler:`
			`def __init__(self, config: Config):`
			`self.config = config`
			`self.qdrant = QdrantService(`
			`host=config.qdrant_host,`
			`collection=config.qdrant_collection,`
			`embedding_model=config.embedding_model,`
			`ollama_host=config.ollama_host`
			`)`
			`self.system_prompt = self._load_system_prompt()`

			`def _load_system_prompt(self) -> str:`
			`"""Load system prompt from static/systemprompt.md."""`
			`try:`
			`path = Path(__file__).parent.parent / "static" / "systemprompt.md"`
			`return path.read_text().strip()`
			`except FileNotFoundError:`
			`logger.error("systemprompt.md not found - required file")`
			`raise`

			`async def process(self, messages: List[Dict], model: str, stream: bool = False) -> Dict:`
			`"""Process chat request through 4-layer context."""`
			`# Get user question (last user message)`
			`user_question = ""`
			`for msg in reversed(messages):`
			`if msg.get("role") == "user":`
			`user_question = msg.get("content", "")`
			`break`

			`# Get messages for semantic search (last N turns)`
			`search_messages = []`
			`for msg in messages[-self.config.semantic_search_turns:]:`
			`if msg.get("role") in ("user", "assistant"):`
			`search_messages.append(msg.get("content", ""))`

			`# Build the 4-layer context messages`
			`context_messages = await self.build_context_messages(`
			`incoming_system=next((m for m in messages if m.get("role") == "system"), None),`
			`user_question=user_question,`
			`search_context=" ".join(search_messages)`
			`)`

			`# Forward to Ollama`
			`async with httpx.AsyncClient(timeout=120.0) as client:`
			`response = await client.post(`
			`f"{self.config.ollama_host}/api/chat",`
			`json={"model": model, "messages": context_messages, "stream": stream}`
			`)`
			`result = response.json()`

			`# Store the Q&A turn in Qdrant`
			`assistant_msg = result.get("message", {}).get("content", "")`
			`await self.qdrant.store_qa_turn(user_question, assistant_msg)`

			`return result`

			`def _parse_curated_turn(self, text: str) -> List[Dict]:`
			`"""Parse a curated turn into alternating user/assistant messages.`

			`Input format:`
			`User: [question]`
			`Assistant: [answer]`
			`Timestamp: ISO datetime`

			`Returns list of message dicts with role and content.`
			`"""`
			`messages = []`
			`lines = text.strip().split("\n")`

			`current_role = None`
			`current_content = []`

			`for line in lines:`
			`line = line.strip()`
			`if line.startswith("User:"):`
			`# Save previous content if exists`
			`if current_role and current_content:`
			`messages.append({`
			`"role": current_role,`
			`"content": "\n".join(current_content).strip()`
			`})`
			`current_role = "user"`
			`current_content = [line[5:].strip()] # Remove "User:" prefix`
			`elif line.startswith("Assistant:"):`
			`# Save previous content if exists`
			`if current_role and current_content:`
			`messages.append({`
			`"role": current_role,`
			`"content": "\n".join(current_content).strip()`
			`})`
			`current_role = "assistant"`
			`current_content = [line[10:].strip()] # Remove "Assistant:" prefix`
			`elif line.startswith("Timestamp:"):`
			`# Ignore timestamp line`
			`continue`
			`elif current_role:`
			`# Continuation of current message`
			`current_content.append(line)`

			`# Save last message`
			`if current_role and current_content:`
			`messages.append({`
			`"role": current_role,`
			`"content": "\n".join(current_content).strip()`
			`})`

			`return messages`

			`async def build_context_messages(self, incoming_system: Optional[Dict], user_question: str, search_context: str) -> List[Dict]:`
			`"""Build 4-layer context messages array."""`
			`messages = []`
			`token_budget = {`
			`"semantic": self.config.semantic_token_budget,`
			`"context": self.config.context_token_budget`
			`}`

			`# === LAYER 1: System Prompt (pass through unchanged) ===`
Remove OpenClaw references 2026-03-26 13:01:30 -05:00			`# DO NOT truncate - preserve system prompt entirely`
Initial commit: Vera-AI v2 with async Qdrant, singleton pattern, monthly curation, and configurable UID/GID/TZ Features: - AsyncQdrantClient for non-blocking Qdrant operations - Singleton pattern for QdrantService - Monthly full curation (day 1 at 03:00) - Configurable UID/GID for Docker - Timezone support via TZ env var - Configurable log directory (VERA_LOG_DIR) - Volume mounts for config/, prompts/, logs/ - Standard Docker format with .env file Fixes: - Removed unused system_token_budget - Added semantic_score_threshold config - Fixed streaming response handling - Python-based healthcheck (no curl dependency) 2026-03-26 12:37:25 -05:00			`system_content = ""`
			`if incoming_system:`
			`system_content = incoming_system.get("content", "")`
			`logger.info(f"System layer: preserved incoming system {len(system_content)} chars, {count_tokens(system_content)} tokens")`

			`# Add Vera context info if present (small, just metadata)`
			`if self.system_prompt.strip():`
			`system_content += "\n\n" + self.system_prompt`
			`logger.info(f"System layer: added vera context {len(self.system_prompt)} chars")`

			`messages.append({"role": "system", "content": system_content})`

			`# === LAYER 2: Semantic Layer (curated memories) ===`
			`# Search for curated blocks only`
			`semantic_results = await self.qdrant.semantic_search(`
			`query=search_context if search_context else user_question,`
			`limit=20,`
			`score_threshold=self.config.semantic_score_threshold,`
			`entry_type="curated"`
			`)`

			`# Parse curated turns into alternating user/assistant messages`
			`semantic_messages = []`
			`semantic_tokens_used = 0`

			`for result in semantic_results:`
			`payload = result.get("payload", {})`
			`text = payload.get("text", "")`
			`if text:`
			`parsed = self._parse_curated_turn(text)`
			`for msg in parsed:`
			`msg_tokens = count_tokens(msg.get("content", ""))`
			`if semantic_tokens_used + msg_tokens <= token_budget["semantic"]:`
			`semantic_messages.append(msg)`
			`semantic_tokens_used += msg_tokens`
			`else:`
			`break`

			`# Add parsed messages to context`
			`for msg in semantic_messages:`
			`messages.append(msg)`

			`if semantic_messages:`
			`logger.info(f"Semantic layer: {len(semantic_messages)} messages, ~{semantic_tokens_used} tokens")`

			`# === LAYER 3: Context Layer (recent turns) ===`
			`recent_turns = await self.qdrant.get_recent_turns(limit=50)`

			`context_messages_parsed = []`
			`context_tokens_used = 0`

			`for turn in reversed(recent_turns): # Oldest first`
			`payload = turn.get("payload", {})`
			`text = payload.get("text", "")`
			`entry_type = payload.get("type", "raw")`

			`if text:`
			`# Parse turn into messages`
			`parsed = self._parse_curated_turn(text)`

			`for msg in parsed:`
			`msg_tokens = count_tokens(msg.get("content", ""))`
			`if context_tokens_used + msg_tokens <= token_budget["context"]:`
			`context_messages_parsed.append(msg)`
			`context_tokens_used += msg_tokens`
			`else:`
			`break`

			`for msg in context_messages_parsed:`
			`messages.append(msg)`

			`if context_messages_parsed:`
			`logger.info(f"Context layer: {len(context_messages_parsed)} messages, ~{context_tokens_used} tokens")`

			`# === LAYER 4: Current Question ===`
			`messages.append({"role": "user", "content": user_question})`

			`return messages`