2 Commits

Author SHA1 Message Date
Vera-AI
34304a79e0 v2.0.2: Production release with role parsing fix and threshold correction 2026-03-27 13:42:22 -05:00
Vera-AI
c78b3f2bb6 fix: parse curated turns into proper user/assistant roles
- Added parse_curated_turn() function to correctly parse stored memories
- Fixed build_augmented_messages() to use proper message roles
- Layer 2 (semantic) and Layer 3 (context) now correctly parse
  User: X / Assistant: Y format into separate messages
- Resolves context corruption where turns were dumped as single user message

v2.0.2
2026-03-27 13:19:08 -05:00
4 changed files with 135 additions and 55 deletions

View File

@@ -4,15 +4,6 @@
# Build arguments: # Build arguments:
# APP_UID: User ID for appuser (default: 999) # APP_UID: User ID for appuser (default: 999)
# APP_GID: Group ID for appgroup (default: 999) # APP_GID: Group ID for appgroup (default: 999)
#
# Build example:
# docker build --build-arg APP_UID=1000 --build-arg APP_GID=1000 -t vera-ai .
#
# Runtime environment variables:
# TZ: Timezone (default: UTC)
# APP_UID: User ID (informational)
# APP_GID: Group ID (informational)
# VERA_LOG_DIR: Debug log directory (default: /app/logs)
# Stage 1: Builder # Stage 1: Builder
FROM python:3.11-slim AS builder FROM python:3.11-slim AS builder
@@ -20,9 +11,7 @@ FROM python:3.11-slim AS builder
WORKDIR /app WORKDIR /app
# Install build dependencies # Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends build-essential && rm -rf /var/lib/apt/lists/*
build-essential \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install # Copy requirements and install
COPY requirements.txt . COPY requirements.txt .
@@ -38,29 +27,25 @@ ARG APP_UID=999
ARG APP_GID=999 ARG APP_GID=999
# Create group and user with specified UID/GID # Create group and user with specified UID/GID
RUN groupadd -g ${APP_GID} appgroup && \ RUN groupadd -g ${APP_GID} appgroup && useradd -u ${APP_UID} -g appgroup -r -m -s /bin/bash appuser
useradd -u ${APP_UID} -g appgroup -r -m -s /bin/bash appuser
# Copy installed packages from builder # Copy installed packages from builder
COPY --from=builder /root/.local /home/appuser/.local COPY --from=builder /root/.local /home/appuser/.local
ENV PATH=/home/appuser/.local/bin:$PATH ENV PATH=/home/appuser/.local/bin:$PATH
# Create directories for mounted volumes # Create directories for mounted volumes
RUN mkdir -p /app/config /app/prompts /app/static /app/logs && \ RUN mkdir -p /app/config /app/prompts /app/logs && chown -R ${APP_UID}:${APP_GID} /app
chown -R ${APP_UID}:${APP_GID} /app
# Copy application code # Copy application code
COPY app/ ./app/ COPY app/ ./app/
# Copy default config and prompts (can be overridden by volume mounts) # Copy default config and prompts (can be overridden by volume mounts)
COPY config.toml /app/config/config.toml COPY config/config.toml /app/config/config.toml
COPY static/curator_prompt.md /app/prompts/curator_prompt.md COPY prompts/curator_prompt.md /app/prompts/curator_prompt.md
COPY static/systemprompt.md /app/prompts/systemprompt.md COPY prompts/systemprompt.md /app/prompts/systemprompt.md
# Create symlinks for backward compatibility # Create symlink for config backward compatibility
RUN ln -sf /app/config/config.toml /app/config.toml && \ RUN ln -sf /app/config/config.toml /app/config.toml
ln -sf /app/prompts/curator_prompt.md /app/static/curator_prompt.md && \
ln -sf /app/prompts/systemprompt.md /app/static/systemprompt.md
# Set ownership # Set ownership
RUN chown -R ${APP_UID}:${APP_GID} /app && chmod -R u+rw /app RUN chown -R ${APP_UID}:${APP_GID} /app && chmod -R u+rw /app
@@ -70,11 +55,10 @@ ENV TZ=UTC
EXPOSE 11434 EXPOSE 11434
# Health check using Python (no curl needed in slim image) # Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:11434/')" || exit 1
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:11434/')" || exit 1
# Switch to non-root user # Switch to non-root user
USER appuser USER appuser
CMD ["python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "11434"]" ENTRYPOINT ["python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "11434"]

View File

@@ -2,7 +2,7 @@
from .config import config from .config import config
import tiktoken import tiktoken
import os import os
from typing import List, Dict from typing import List, Dict, Optional
from datetime import datetime, timedelta from datetime import datetime, timedelta
from pathlib import Path from pathlib import Path
@@ -127,10 +127,70 @@ def load_system_prompt() -> str:
return "" return ""
def parse_curated_turn(text: str) -> List[Dict]:
"""Parse a curated turn into alternating user/assistant messages.
Input format:
User: [question]
Assistant: [answer]
Timestamp: ISO datetime
Returns list of message dicts with role and content.
Returns empty list if parsing fails.
"""
if not text:
return []
messages = []
lines = text.strip().split("\n")
current_role = None
current_content = []
for line in lines:
line = line.strip()
if line.startswith("User:"):
# Save previous content if exists
if current_role and current_content:
messages.append({
"role": current_role,
"content": "\n".join(current_content).strip()
})
current_role = "user"
current_content = [line[5:].strip()] # Remove "User:" prefix
elif line.startswith("Assistant:"):
# Save previous content if exists
if current_role and current_content:
messages.append({
"role": current_role,
"content": "\n".join(current_content).strip()
})
current_role = "assistant"
current_content = [line[10:].strip()] # Remove "Assistant:" prefix
elif line.startswith("Timestamp:"):
# Ignore timestamp line
continue
elif current_role:
# Continuation of current message
current_content.append(line)
# Save last message
if current_role and current_content:
messages.append({
"role": current_role,
"content": "\n".join(current_content).strip()
})
return messages
async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]: async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
"""Build 4-layer augmented messages from incoming messages. """Build 4-layer augmented messages from incoming messages.
This is a standalone version that can be used by proxy_handler.py. Layer 1: System prompt (preserved from incoming + vera context)
Layer 2: Semantic memories (curated, parsed into proper roles)
Layer 3: Recent context (raw turns, parsed into proper roles)
Layer 4: Current conversation (passed through)
""" """
import logging import logging
@@ -153,6 +213,10 @@ async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
search_context += msg.get("content", "") + " " search_context += msg.get("content", "") + " "
messages = [] messages = []
token_budget = {
"semantic": config.semantic_token_budget,
"context": config.context_token_budget
}
# === LAYER 1: System Prompt === # === LAYER 1: System Prompt ===
system_content = "" system_content = ""
@@ -166,6 +230,7 @@ async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
if system_content: if system_content:
messages.append({"role": "system", "content": system_content}) messages.append({"role": "system", "content": system_content})
logger.info(f"Layer 1 (system): {count_tokens(system_content)} tokens")
# === LAYER 2: Semantic (curated memories) === # === LAYER 2: Semantic (curated memories) ===
qdrant = get_qdrant_service() qdrant = get_qdrant_service()
@@ -176,28 +241,71 @@ async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
entry_type="curated" entry_type="curated"
) )
semantic_tokens = 0 semantic_messages = []
semantic_tokens_used = 0
for result in semantic_results: for result in semantic_results:
payload = result.get("payload", {}) payload = result.get("payload", {})
text = payload.get("text", "") text = payload.get("text", "")
if text and semantic_tokens < config.semantic_token_budget: if text:
messages.append({"role": "user", "content": text}) # Add as context # Parse curated turn into proper user/assistant messages
semantic_tokens += count_tokens(text) parsed = parse_curated_turn(text)
for msg in parsed:
msg_tokens = count_tokens(msg.get("content", ""))
if semantic_tokens_used + msg_tokens <= token_budget["semantic"]:
semantic_messages.append(msg)
semantic_tokens_used += msg_tokens
else:
break
if semantic_tokens_used >= token_budget["semantic"]:
break
# Add parsed messages to context
for msg in semantic_messages:
messages.append(msg)
if semantic_messages:
logger.info(f"Layer 2 (semantic): {len(semantic_messages)} messages, ~{semantic_tokens_used} tokens")
# === LAYER 3: Context (recent turns) === # === LAYER 3: Context (recent turns) ===
recent_turns = await qdrant.get_recent_turns(limit=20) recent_turns = await qdrant.get_recent_turns(limit=50)
context_tokens = 0 context_messages = []
context_tokens_used = 0
# Process oldest first for chronological order
for turn in reversed(recent_turns): for turn in reversed(recent_turns):
payload = turn.get("payload", {}) payload = turn.get("payload", {})
text = payload.get("text", "") text = payload.get("text", "")
if text and context_tokens < config.context_token_budget: entry_type = payload.get("type", "raw")
messages.append({"role": "user", "content": text}) # Add as context
context_tokens += count_tokens(text)
# === LAYER 4: Current messages (passed through) === if text:
# Parse turn into messages
parsed = parse_curated_turn(text)
for msg in parsed:
msg_tokens = count_tokens(msg.get("content", ""))
if context_tokens_used + msg_tokens <= token_budget["context"]:
context_messages.append(msg)
context_tokens_used += msg_tokens
else:
break
if context_tokens_used >= token_budget["context"]:
break
# Add context messages (oldest first maintains conversation order)
for msg in context_messages:
messages.append(msg)
if context_messages:
logger.info(f"Layer 3 (context): {len(context_messages)} messages, ~{context_tokens_used} tokens")
# === LAYER 4: Current conversation ===
for msg in incoming_messages: for msg in incoming_messages:
if msg.get("role") != "system": # Do not duplicate system if msg.get("role") != "system": # System already handled in Layer 1
messages.append(msg) messages.append(msg)
logger.info(f"Layer 4 (current): {len([m for m in incoming_messages if m.get('role') != 'system'])} messages")
return messages return messages

View File

@@ -2,18 +2,15 @@
ollama_host = "http://10.0.0.10:11434" ollama_host = "http://10.0.0.10:11434"
qdrant_host = "http://10.0.0.22:6333" qdrant_host = "http://10.0.0.22:6333"
qdrant_collection = "memories" qdrant_collection = "memories"
embedding_model = "snowflake-arctic-embed2" embedding_model = "mxbai-embed-large"
debug = false debug = false
[layers] [layers]
# Note: system_token_budget removed - system prompt is never truncated
semantic_token_budget = 25000 semantic_token_budget = 25000
context_token_budget = 22000 context_token_budget = 22000
semantic_search_turns = 2 semantic_search_turns = 2
semantic_score_threshold = 0.6 semantic_score_threshold = 0.3
[curator] [curator]
# Daily curation: processes recent 24h of raw memories
# Monthly mode is detected automatically by curator_prompt.md (day 01)
run_time = "02:00" run_time = "02:00"
curator_model = "gpt-oss:120b" curator_model = "gpt-oss:120b"

View File

@@ -1,10 +1 @@
You have persistent memory across all conversations with this user.
**Important:** The latter portion of your conversation context contains memories retrieved from a vector database. These are curated summaries of past conversations, not live chat history.
Use these memories to:
- Reference previous decisions and preferences
- Draw on relevant past discussions
- Provide personalized, context-aware responses
If memories seem outdated or conflicting, ask for clarification.