2 Commits

Author SHA1 Message Date
Vera-AI
34304a79e0 v2.0.2: Production release with role parsing fix and threshold correction 2026-03-27 13:42:22 -05:00
Vera-AI
c78b3f2bb6 fix: parse curated turns into proper user/assistant roles
- Added parse_curated_turn() function to correctly parse stored memories
- Fixed build_augmented_messages() to use proper message roles
- Layer 2 (semantic) and Layer 3 (context) now correctly parse
  User: X / Assistant: Y format into separate messages
- Resolves context corruption where turns were dumped as single user message

v2.0.2
2026-03-27 13:19:08 -05:00
4 changed files with 135 additions and 55 deletions

View File

@@ -4,15 +4,6 @@
# Build arguments:
# APP_UID: User ID for appuser (default: 999)
# APP_GID: Group ID for appgroup (default: 999)
#
# Build example:
# docker build --build-arg APP_UID=1000 --build-arg APP_GID=1000 -t vera-ai .
#
# Runtime environment variables:
# TZ: Timezone (default: UTC)
# APP_UID: User ID (informational)
# APP_GID: Group ID (informational)
# VERA_LOG_DIR: Debug log directory (default: /app/logs)
# Stage 1: Builder
FROM python:3.11-slim AS builder
@@ -20,9 +11,7 @@ FROM python:3.11-slim AS builder
WORKDIR /app
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
&& rm -rf /var/lib/apt/lists/*
RUN apt-get update && apt-get install -y --no-install-recommends build-essential && rm -rf /var/lib/apt/lists/*
# Copy requirements and install
COPY requirements.txt .
@@ -38,29 +27,25 @@ ARG APP_UID=999
ARG APP_GID=999
# Create group and user with specified UID/GID
RUN groupadd -g ${APP_GID} appgroup && \
useradd -u ${APP_UID} -g appgroup -r -m -s /bin/bash appuser
RUN groupadd -g ${APP_GID} appgroup && useradd -u ${APP_UID} -g appgroup -r -m -s /bin/bash appuser
# Copy installed packages from builder
COPY --from=builder /root/.local /home/appuser/.local
ENV PATH=/home/appuser/.local/bin:$PATH
# Create directories for mounted volumes
RUN mkdir -p /app/config /app/prompts /app/static /app/logs && \
chown -R ${APP_UID}:${APP_GID} /app
RUN mkdir -p /app/config /app/prompts /app/logs && chown -R ${APP_UID}:${APP_GID} /app
# Copy application code
COPY app/ ./app/
# Copy default config and prompts (can be overridden by volume mounts)
COPY config.toml /app/config/config.toml
COPY static/curator_prompt.md /app/prompts/curator_prompt.md
COPY static/systemprompt.md /app/prompts/systemprompt.md
COPY config/config.toml /app/config/config.toml
COPY prompts/curator_prompt.md /app/prompts/curator_prompt.md
COPY prompts/systemprompt.md /app/prompts/systemprompt.md
# Create symlinks for backward compatibility
RUN ln -sf /app/config/config.toml /app/config.toml && \
ln -sf /app/prompts/curator_prompt.md /app/static/curator_prompt.md && \
ln -sf /app/prompts/systemprompt.md /app/static/systemprompt.md
# Create symlink for config backward compatibility
RUN ln -sf /app/config/config.toml /app/config.toml
# Set ownership
RUN chown -R ${APP_UID}:${APP_GID} /app && chmod -R u+rw /app
@@ -70,11 +55,10 @@ ENV TZ=UTC
EXPOSE 11434
# Health check using Python (no curl needed in slim image)
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:11434/')" || exit 1
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:11434/')" || exit 1
# Switch to non-root user
USER appuser
CMD ["python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "11434"]"
ENTRYPOINT ["python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "11434"]

View File

@@ -2,7 +2,7 @@
from .config import config
import tiktoken
import os
from typing import List, Dict
from typing import List, Dict, Optional
from datetime import datetime, timedelta
from pathlib import Path
@@ -127,10 +127,70 @@ def load_system_prompt() -> str:
return ""
def parse_curated_turn(text: str) -> List[Dict]:
"""Parse a curated turn into alternating user/assistant messages.
Input format:
User: [question]
Assistant: [answer]
Timestamp: ISO datetime
Returns list of message dicts with role and content.
Returns empty list if parsing fails.
"""
if not text:
return []
messages = []
lines = text.strip().split("\n")
current_role = None
current_content = []
for line in lines:
line = line.strip()
if line.startswith("User:"):
# Save previous content if exists
if current_role and current_content:
messages.append({
"role": current_role,
"content": "\n".join(current_content).strip()
})
current_role = "user"
current_content = [line[5:].strip()] # Remove "User:" prefix
elif line.startswith("Assistant:"):
# Save previous content if exists
if current_role and current_content:
messages.append({
"role": current_role,
"content": "\n".join(current_content).strip()
})
current_role = "assistant"
current_content = [line[10:].strip()] # Remove "Assistant:" prefix
elif line.startswith("Timestamp:"):
# Ignore timestamp line
continue
elif current_role:
# Continuation of current message
current_content.append(line)
# Save last message
if current_role and current_content:
messages.append({
"role": current_role,
"content": "\n".join(current_content).strip()
})
return messages
async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
"""Build 4-layer augmented messages from incoming messages.
This is a standalone version that can be used by proxy_handler.py.
Layer 1: System prompt (preserved from incoming + vera context)
Layer 2: Semantic memories (curated, parsed into proper roles)
Layer 3: Recent context (raw turns, parsed into proper roles)
Layer 4: Current conversation (passed through)
"""
import logging
@@ -153,6 +213,10 @@ async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
search_context += msg.get("content", "") + " "
messages = []
token_budget = {
"semantic": config.semantic_token_budget,
"context": config.context_token_budget
}
# === LAYER 1: System Prompt ===
system_content = ""
@@ -166,6 +230,7 @@ async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
if system_content:
messages.append({"role": "system", "content": system_content})
logger.info(f"Layer 1 (system): {count_tokens(system_content)} tokens")
# === LAYER 2: Semantic (curated memories) ===
qdrant = get_qdrant_service()
@@ -176,28 +241,71 @@ async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
entry_type="curated"
)
semantic_tokens = 0
semantic_messages = []
semantic_tokens_used = 0
for result in semantic_results:
payload = result.get("payload", {})
text = payload.get("text", "")
if text and semantic_tokens < config.semantic_token_budget:
messages.append({"role": "user", "content": text}) # Add as context
semantic_tokens += count_tokens(text)
if text:
# Parse curated turn into proper user/assistant messages
parsed = parse_curated_turn(text)
for msg in parsed:
msg_tokens = count_tokens(msg.get("content", ""))
if semantic_tokens_used + msg_tokens <= token_budget["semantic"]:
semantic_messages.append(msg)
semantic_tokens_used += msg_tokens
else:
break
if semantic_tokens_used >= token_budget["semantic"]:
break
# Add parsed messages to context
for msg in semantic_messages:
messages.append(msg)
if semantic_messages:
logger.info(f"Layer 2 (semantic): {len(semantic_messages)} messages, ~{semantic_tokens_used} tokens")
# === LAYER 3: Context (recent turns) ===
recent_turns = await qdrant.get_recent_turns(limit=20)
recent_turns = await qdrant.get_recent_turns(limit=50)
context_tokens = 0
context_messages = []
context_tokens_used = 0
# Process oldest first for chronological order
for turn in reversed(recent_turns):
payload = turn.get("payload", {})
text = payload.get("text", "")
if text and context_tokens < config.context_token_budget:
messages.append({"role": "user", "content": text}) # Add as context
context_tokens += count_tokens(text)
entry_type = payload.get("type", "raw")
# === LAYER 4: Current messages (passed through) ===
if text:
# Parse turn into messages
parsed = parse_curated_turn(text)
for msg in parsed:
msg_tokens = count_tokens(msg.get("content", ""))
if context_tokens_used + msg_tokens <= token_budget["context"]:
context_messages.append(msg)
context_tokens_used += msg_tokens
else:
break
if context_tokens_used >= token_budget["context"]:
break
# Add context messages (oldest first maintains conversation order)
for msg in context_messages:
messages.append(msg)
if context_messages:
logger.info(f"Layer 3 (context): {len(context_messages)} messages, ~{context_tokens_used} tokens")
# === LAYER 4: Current conversation ===
for msg in incoming_messages:
if msg.get("role") != "system": # Do not duplicate system
if msg.get("role") != "system": # System already handled in Layer 1
messages.append(msg)
logger.info(f"Layer 4 (current): {len([m for m in incoming_messages if m.get('role') != 'system'])} messages")
return messages

View File

@@ -2,18 +2,15 @@
ollama_host = "http://10.0.0.10:11434"
qdrant_host = "http://10.0.0.22:6333"
qdrant_collection = "memories"
embedding_model = "snowflake-arctic-embed2"
embedding_model = "mxbai-embed-large"
debug = false
[layers]
# Note: system_token_budget removed - system prompt is never truncated
semantic_token_budget = 25000
context_token_budget = 22000
semantic_search_turns = 2
semantic_score_threshold = 0.6
semantic_score_threshold = 0.3
[curator]
# Daily curation: processes recent 24h of raw memories
# Monthly mode is detected automatically by curator_prompt.md (day 01)
run_time = "02:00"
curator_model = "gpt-oss:120b"

View File

@@ -1,10 +1 @@
You have persistent memory across all conversations with this user.
**Important:** The latter portion of your conversation context contains memories retrieved from a vector database. These are curated summaries of past conversations, not live chat history.
Use these memories to:
- Reference previous decisions and preferences
- Draw on relevant past discussions
- Provide personalized, context-aware responses
If memories seem outdated or conflicting, ask for clarification.