Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
34304a79e0 | ||
|
|
c78b3f2bb6 | ||
|
|
50874eeae9 |
@@ -148,10 +148,8 @@ semantic_score_threshold = 0.6
|
|||||||
run_time = "02:00"
|
run_time = "02:00"
|
||||||
|
|
||||||
# Time for monthly full curation (HH:MM format)
|
# Time for monthly full curation (HH:MM format)
|
||||||
full_run_time = "03:00"
|
|
||||||
|
|
||||||
# Day of month for full curation (1-28)
|
# Day of month for full curation (1-28)
|
||||||
full_run_day = 1
|
|
||||||
|
|
||||||
# Model to use for curation
|
# Model to use for curation
|
||||||
curator_model = "gpt-oss:120b"
|
curator_model = "gpt-oss:120b"
|
||||||
@@ -308,7 +306,7 @@ docker run -d --name VeraAI -p 8080:11434 ...
|
|||||||
| Feature | Description |
|
| Feature | Description |
|
||||||
|---------|-------------|
|
|---------|-------------|
|
||||||
| 🧠 **Persistent Memory** | Conversations stored in Qdrant, retrieved contextually |
|
| 🧠 **Persistent Memory** | Conversations stored in Qdrant, retrieved contextually |
|
||||||
| 📅 **Monthly Curation** | Daily + monthly cleanup of raw memories |
|
| 📅 **Monthly Curation** | Daily cleanup, auto-monthly on day 01 |
|
||||||
| 🔍 **4-Layer Context** | System + semantic + recent + current messages |
|
| 🔍 **4-Layer Context** | System + semantic + recent + current messages |
|
||||||
| 👤 **Configurable UID/GID** | Match container user to host for permissions |
|
| 👤 **Configurable UID/GID** | Match container user to host for permissions |
|
||||||
| 🌍 **Timezone Support** | Scheduler runs in your local timezone |
|
| 🌍 **Timezone Support** | Scheduler runs in your local timezone |
|
||||||
|
|||||||
38
Dockerfile
38
Dockerfile
@@ -4,15 +4,6 @@
|
|||||||
# Build arguments:
|
# Build arguments:
|
||||||
# APP_UID: User ID for appuser (default: 999)
|
# APP_UID: User ID for appuser (default: 999)
|
||||||
# APP_GID: Group ID for appgroup (default: 999)
|
# APP_GID: Group ID for appgroup (default: 999)
|
||||||
#
|
|
||||||
# Build example:
|
|
||||||
# docker build --build-arg APP_UID=1000 --build-arg APP_GID=1000 -t vera-ai .
|
|
||||||
#
|
|
||||||
# Runtime environment variables:
|
|
||||||
# TZ: Timezone (default: UTC)
|
|
||||||
# APP_UID: User ID (informational)
|
|
||||||
# APP_GID: Group ID (informational)
|
|
||||||
# VERA_LOG_DIR: Debug log directory (default: /app/logs)
|
|
||||||
|
|
||||||
# Stage 1: Builder
|
# Stage 1: Builder
|
||||||
FROM python:3.11-slim AS builder
|
FROM python:3.11-slim AS builder
|
||||||
@@ -20,9 +11,7 @@ FROM python:3.11-slim AS builder
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install build dependencies
|
# Install build dependencies
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends build-essential && rm -rf /var/lib/apt/lists/*
|
||||||
build-essential \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Copy requirements and install
|
# Copy requirements and install
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
@@ -38,29 +27,25 @@ ARG APP_UID=999
|
|||||||
ARG APP_GID=999
|
ARG APP_GID=999
|
||||||
|
|
||||||
# Create group and user with specified UID/GID
|
# Create group and user with specified UID/GID
|
||||||
RUN groupadd -g ${APP_GID} appgroup && \
|
RUN groupadd -g ${APP_GID} appgroup && useradd -u ${APP_UID} -g appgroup -r -m -s /bin/bash appuser
|
||||||
useradd -u ${APP_UID} -g appgroup -r -m -s /bin/bash appuser
|
|
||||||
|
|
||||||
# Copy installed packages from builder
|
# Copy installed packages from builder
|
||||||
COPY --from=builder /root/.local /home/appuser/.local
|
COPY --from=builder /root/.local /home/appuser/.local
|
||||||
ENV PATH=/home/appuser/.local/bin:$PATH
|
ENV PATH=/home/appuser/.local/bin:$PATH
|
||||||
|
|
||||||
# Create directories for mounted volumes
|
# Create directories for mounted volumes
|
||||||
RUN mkdir -p /app/config /app/prompts /app/static /app/logs && \
|
RUN mkdir -p /app/config /app/prompts /app/logs && chown -R ${APP_UID}:${APP_GID} /app
|
||||||
chown -R ${APP_UID}:${APP_GID} /app
|
|
||||||
|
|
||||||
# Copy application code
|
# Copy application code
|
||||||
COPY app/ ./app/
|
COPY app/ ./app/
|
||||||
|
|
||||||
# Copy default config and prompts (can be overridden by volume mounts)
|
# Copy default config and prompts (can be overridden by volume mounts)
|
||||||
COPY config.toml /app/config/config.toml
|
COPY config/config.toml /app/config/config.toml
|
||||||
COPY static/curator_prompt.md /app/prompts/curator_prompt.md
|
COPY prompts/curator_prompt.md /app/prompts/curator_prompt.md
|
||||||
COPY static/systemprompt.md /app/prompts/systemprompt.md
|
COPY prompts/systemprompt.md /app/prompts/systemprompt.md
|
||||||
|
|
||||||
# Create symlinks for backward compatibility
|
# Create symlink for config backward compatibility
|
||||||
RUN ln -sf /app/config/config.toml /app/config.toml && \
|
RUN ln -sf /app/config/config.toml /app/config.toml
|
||||||
ln -sf /app/prompts/curator_prompt.md /app/static/curator_prompt.md && \
|
|
||||||
ln -sf /app/prompts/systemprompt.md /app/static/systemprompt.md
|
|
||||||
|
|
||||||
# Set ownership
|
# Set ownership
|
||||||
RUN chown -R ${APP_UID}:${APP_GID} /app && chmod -R u+rw /app
|
RUN chown -R ${APP_UID}:${APP_GID} /app && chmod -R u+rw /app
|
||||||
@@ -70,11 +55,10 @@ ENV TZ=UTC
|
|||||||
|
|
||||||
EXPOSE 11434
|
EXPOSE 11434
|
||||||
|
|
||||||
# Health check using Python (no curl needed in slim image)
|
# Health check
|
||||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:11434/')" || exit 1
|
||||||
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:11434/')" || exit 1
|
|
||||||
|
|
||||||
# Switch to non-root user
|
# Switch to non-root user
|
||||||
USER appuser
|
USER appuser
|
||||||
|
|
||||||
CMD ["python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "11434"]"
|
ENTRYPOINT ["python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "11434"]
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ Every conversation is stored in Qdrant vector database and retrieved contextuall
|
|||||||
| Feature | Description |
|
| Feature | Description |
|
||||||
|---------|-------------|
|
|---------|-------------|
|
||||||
| **🧠 Persistent Memory** | Conversations stored in Qdrant, retrieved contextually |
|
| **🧠 Persistent Memory** | Conversations stored in Qdrant, retrieved contextually |
|
||||||
| **📅 Monthly Curation** | Daily + monthly cleanup of raw memories |
|
| **📅 Smart Curation** | Daily cleanup, auto-monthly on day 01 |
|
||||||
| **🔍 4-Layer Context** | System + semantic + recent + current messages |
|
| **🔍 4-Layer Context** | System + semantic + recent + current messages |
|
||||||
| **👤 Configurable UID/GID** | Match container user to host for permissions |
|
| **👤 Configurable UID/GID** | Match container user to host for permissions |
|
||||||
| **🌍 Timezone Support** | Scheduler runs in your local timezone |
|
| **🌍 Timezone Support** | Scheduler runs in your local timezone |
|
||||||
@@ -314,10 +314,8 @@ run_time = "02:00"
|
|||||||
|
|
||||||
# Time for monthly full curation (HH:MM format, 24-hour)
|
# Time for monthly full curation (HH:MM format, 24-hour)
|
||||||
# Processes ALL raw memories
|
# Processes ALL raw memories
|
||||||
full_run_time = "03:00"
|
|
||||||
|
|
||||||
# Day of month for full curation (1-28)
|
# Day of month for full curation (1-28)
|
||||||
full_run_day = 1
|
|
||||||
|
|
||||||
# Model to use for curation
|
# Model to use for curation
|
||||||
# Should be a capable model for summarization
|
# Should be a capable model for summarization
|
||||||
@@ -540,7 +538,8 @@ TZ=Europe/London # GMT/BST
|
|||||||
curl -X POST http://localhost:11434/curator/run
|
curl -X POST http://localhost:11434/curator/run
|
||||||
|
|
||||||
# Full curation (all raw memories)
|
# Full curation (all raw memories)
|
||||||
curl -X POST "http://localhost:11434/curator/run?full=true"
|
# Monthly mode is automatic on day 01
|
||||||
|
# curl -X POST http://localhost:11434/curator/run
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|||||||
@@ -48,8 +48,7 @@ class Config:
|
|||||||
semantic_search_turns: int = 2
|
semantic_search_turns: int = 2
|
||||||
semantic_score_threshold: float = 0.6 # Score threshold for semantic search
|
semantic_score_threshold: float = 0.6 # Score threshold for semantic search
|
||||||
run_time: str = "02:00" # Daily curator time
|
run_time: str = "02:00" # Daily curator time
|
||||||
full_run_time: str = "03:00" # Monthly full curator time
|
# Monthly mode is detected by curator_prompt.md (day 01)
|
||||||
full_run_day: int = 1 # Day of month for full run (1st)
|
|
||||||
curator_model: str = "gpt-oss:120b"
|
curator_model: str = "gpt-oss:120b"
|
||||||
debug: bool = False
|
debug: bool = False
|
||||||
cloud: CloudConfig = field(default_factory=CloudConfig)
|
cloud: CloudConfig = field(default_factory=CloudConfig)
|
||||||
@@ -103,8 +102,6 @@ class Config:
|
|||||||
|
|
||||||
if "curator" in data:
|
if "curator" in data:
|
||||||
config.run_time = data["curator"].get("run_time", config.run_time)
|
config.run_time = data["curator"].get("run_time", config.run_time)
|
||||||
config.full_run_time = data["curator"].get("full_run_time", config.full_run_time)
|
|
||||||
config.full_run_day = data["curator"].get("full_run_day", config.full_run_day)
|
|
||||||
config.curator_model = data["curator"].get("curator_model", config.curator_model)
|
config.curator_model = data["curator"].get("curator_model", config.curator_model)
|
||||||
|
|
||||||
if "cloud" in data:
|
if "cloud" in data:
|
||||||
@@ -118,4 +115,4 @@ class Config:
|
|||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
config = Config.load()
|
config = Config.load()
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
"""Memory curator - runs daily (recent 24h) and monthly (full DB) to clean and maintain memory database.
|
"""Memory curator - runs daily to clean and maintain memory database.
|
||||||
|
|
||||||
Creates INDIVIDUAL cleaned turns (one per raw turn), not merged summaries.
|
On day 01 of each month, processes ALL raw memories (monthly mode).
|
||||||
Parses JSON response from curator_prompt.md format.
|
Otherwise, processes recent 24h of raw memories (daily mode).
|
||||||
|
The prompt determines behavior based on current date.
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@@ -23,7 +24,6 @@ STATIC_DIR = Path(os.environ.get("VERA_STATIC_DIR", "/app/static"))
|
|||||||
|
|
||||||
def load_curator_prompt() -> str:
|
def load_curator_prompt() -> str:
|
||||||
"""Load curator prompt from prompts directory."""
|
"""Load curator prompt from prompts directory."""
|
||||||
# Try prompts directory first, then static for backward compatibility
|
|
||||||
prompts_path = PROMPTS_DIR / "curator_prompt.md"
|
prompts_path = PROMPTS_DIR / "curator_prompt.md"
|
||||||
static_path = STATIC_DIR / "curator_prompt.md"
|
static_path = STATIC_DIR / "curator_prompt.md"
|
||||||
|
|
||||||
@@ -42,16 +42,20 @@ class Curator:
|
|||||||
self.ollama_host = ollama_host
|
self.ollama_host = ollama_host
|
||||||
self.curator_prompt = load_curator_prompt()
|
self.curator_prompt = load_curator_prompt()
|
||||||
|
|
||||||
async def run(self, full: bool = False):
|
async def run(self):
|
||||||
"""Run the curation process.
|
"""Run the curation process.
|
||||||
|
|
||||||
Args:
|
Automatically detects day 01 for monthly mode (processes ALL raw memories).
|
||||||
full: If True, process ALL raw memories (monthly full run).
|
Otherwise runs daily mode (processes recent 24h only).
|
||||||
If False, process only recent 24h (daily run).
|
The prompt determines behavior based on current date.
|
||||||
"""
|
"""
|
||||||
logger.info(f"Starting memory curation (full={full})...")
|
current_date = datetime.utcnow()
|
||||||
|
is_monthly = current_date.day == 1
|
||||||
|
mode = "MONTHLY" if is_monthly else "DAILY"
|
||||||
|
|
||||||
|
logger.info(f"Starting memory curation ({mode} mode)...")
|
||||||
try:
|
try:
|
||||||
current_date = datetime.utcnow().strftime("%Y-%m-%d")
|
current_date_str = current_date.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
# Get all memories (async)
|
# Get all memories (async)
|
||||||
points, _ = await self.qdrant.client.scroll(
|
points, _ = await self.qdrant.client.scroll(
|
||||||
@@ -77,15 +81,15 @@ class Curator:
|
|||||||
|
|
||||||
logger.info(f"Found {len(raw_memories)} raw, {len(curated_memories)} curated")
|
logger.info(f"Found {len(raw_memories)} raw, {len(curated_memories)} curated")
|
||||||
|
|
||||||
# Filter by time for daily runs, process all for full runs
|
# Filter by time for daily mode, process all for monthly mode
|
||||||
if full:
|
if is_monthly:
|
||||||
# Monthly full run: process ALL raw memories
|
# Monthly full run: process ALL raw memories
|
||||||
recent_raw = raw_memories
|
recent_raw = raw_memories
|
||||||
logger.info(f"FULL RUN: Processing all {len(recent_raw)} raw memories")
|
logger.info(f"MONTHLY MODE: Processing all {len(recent_raw)} raw memories")
|
||||||
else:
|
else:
|
||||||
# Daily run: process only recent 24h
|
# Daily run: process only recent 24h
|
||||||
recent_raw = [m for m in raw_memories if self._is_recent(m, hours=24)]
|
recent_raw = [m for m in raw_memories if self._is_recent(m, hours=24)]
|
||||||
logger.info(f"DAILY RUN: Processing {len(recent_raw)} recent raw memories")
|
logger.info(f"DAILY MODE: Processing {len(recent_raw)} recent raw memories")
|
||||||
|
|
||||||
existing_sample = curated_memories[-50:] if len(curated_memories) > 50 else curated_memories
|
existing_sample = curated_memories[-50:] if len(curated_memories) > 50 else curated_memories
|
||||||
|
|
||||||
@@ -96,10 +100,10 @@ class Curator:
|
|||||||
raw_turns_text = self._format_raw_turns(recent_raw)
|
raw_turns_text = self._format_raw_turns(recent_raw)
|
||||||
existing_text = self._format_existing_memories(existing_sample)
|
existing_text = self._format_existing_memories(existing_sample)
|
||||||
|
|
||||||
prompt = self.curator_prompt.replace("{CURRENT_DATE}", current_date)
|
prompt = self.curator_prompt.replace("{CURRENT_DATE}", current_date_str)
|
||||||
full_prompt = f"""{prompt}
|
full_prompt = f"""{prompt}
|
||||||
|
|
||||||
## {'All' if full else 'Recent'} Raw Turns ({'full database' if full else 'last 24 hours'}):
|
## {'All' if is_monthly else 'Recent'} Raw Turns ({'full database' if is_monthly else 'last 24 hours'}):
|
||||||
{raw_turns_text}
|
{raw_turns_text}
|
||||||
|
|
||||||
## Existing Memories (sample):
|
## Existing Memories (sample):
|
||||||
@@ -152,20 +156,12 @@ Remember: Respond with ONLY valid JSON. No markdown, no explanations, just the J
|
|||||||
await self.qdrant.delete_points(raw_ids_to_delete)
|
await self.qdrant.delete_points(raw_ids_to_delete)
|
||||||
logger.info(f"Deleted {len(raw_ids_to_delete)} processed raw memories")
|
logger.info(f"Deleted {len(raw_ids_to_delete)} processed raw memories")
|
||||||
|
|
||||||
logger.info(f"Memory curation completed successfully (full={full})")
|
logger.info(f"Memory curation completed successfully ({mode} mode)")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error during curation: {e}")
|
logger.error(f"Error during curation: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def run_full(self):
|
|
||||||
"""Run full curation (all raw memories). Convenience method."""
|
|
||||||
await self.run(full=True)
|
|
||||||
|
|
||||||
async def run_daily(self):
|
|
||||||
"""Run daily curation (recent 24h only). Convenience method."""
|
|
||||||
await self.run(full=False)
|
|
||||||
|
|
||||||
def _is_recent(self, memory: Dict, hours: int = 24) -> bool:
|
def _is_recent(self, memory: Dict, hours: int = 24) -> bool:
|
||||||
"""Check if memory is within the specified hours."""
|
"""Check if memory is within the specified hours."""
|
||||||
timestamp = memory.get("timestamp", "")
|
timestamp = memory.get("timestamp", "")
|
||||||
@@ -236,7 +232,9 @@ Remember: Respond with ONLY valid JSON. No markdown, no explanations, just the J
|
|||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
json_match = re.search(r'```(?:json)?\s*([\s\S]*?)```', response)
|
# Try to find JSON in code blocks
|
||||||
|
pattern = r'```(?:json)?\s*([\s\S]*?)```'
|
||||||
|
json_match = re.search(pattern, response)
|
||||||
if json_match:
|
if json_match:
|
||||||
try:
|
try:
|
||||||
return json.loads(json_match.group(1).strip())
|
return json.loads(json_match.group(1).strip())
|
||||||
@@ -248,7 +246,6 @@ Remember: Respond with ONLY valid JSON. No markdown, no explanations, just the J
|
|||||||
|
|
||||||
async def _append_rule_to_file(self, filename: str, rule: str):
|
async def _append_rule_to_file(self, filename: str, rule: str):
|
||||||
"""Append a permanent rule to a prompts file."""
|
"""Append a permanent rule to a prompts file."""
|
||||||
# Try prompts directory first, then static for backward compatibility
|
|
||||||
prompts_path = PROMPTS_DIR / filename
|
prompts_path = PROMPTS_DIR / filename
|
||||||
static_path = STATIC_DIR / filename
|
static_path = STATIC_DIR / filename
|
||||||
|
|
||||||
|
|||||||
56
app/main.py
56
app/main.py
@@ -20,25 +20,19 @@ curator = None
|
|||||||
|
|
||||||
|
|
||||||
async def run_curator():
|
async def run_curator():
|
||||||
"""Scheduled daily curator job (recent 24h)."""
|
"""Scheduled daily curator job.
|
||||||
|
|
||||||
|
Runs every day at configured time. The curator itself detects
|
||||||
|
if it's day 01 (monthly mode) and processes all memories.
|
||||||
|
Otherwise processes recent 24h only.
|
||||||
|
"""
|
||||||
global curator
|
global curator
|
||||||
logger.info("Starting daily memory curation...")
|
logger.info("Starting memory curation...")
|
||||||
try:
|
try:
|
||||||
await curator.run_daily()
|
await curator.run()
|
||||||
logger.info("Daily memory curation completed successfully")
|
logger.info("Memory curation completed successfully")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Daily memory curation failed: {e}")
|
logger.error(f"Memory curation failed: {e}")
|
||||||
|
|
||||||
|
|
||||||
async def run_curator_full():
|
|
||||||
"""Scheduled monthly curator job (full database)."""
|
|
||||||
global curator
|
|
||||||
logger.info("Starting monthly full memory curation...")
|
|
||||||
try:
|
|
||||||
await curator.run_full()
|
|
||||||
logger.info("Monthly full memory curation completed successfully")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Monthly full memory curation failed: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
@@ -59,23 +53,12 @@ async def lifespan(app: FastAPI):
|
|||||||
ollama_host=config.ollama_host
|
ollama_host=config.ollama_host
|
||||||
)
|
)
|
||||||
|
|
||||||
# Schedule daily curator (recent 24h)
|
# Schedule daily curator
|
||||||
|
# Note: Monthly mode is detected automatically by curator_prompt.md (day 01)
|
||||||
hour, minute = map(int, config.run_time.split(":"))
|
hour, minute = map(int, config.run_time.split(":"))
|
||||||
scheduler.add_job(run_curator, "cron", hour=hour, minute=minute, id="daily_curator")
|
scheduler.add_job(run_curator, "cron", hour=hour, minute=minute, id="daily_curator")
|
||||||
logger.info(f"Daily curator scheduled at {config.run_time}")
|
logger.info(f"Daily curator scheduled at {config.run_time}")
|
||||||
|
|
||||||
# Schedule monthly full curator (all raw memories)
|
|
||||||
full_hour, full_minute = map(int, config.full_run_time.split(":"))
|
|
||||||
scheduler.add_job(
|
|
||||||
run_curator_full,
|
|
||||||
"cron",
|
|
||||||
day=config.full_run_day,
|
|
||||||
hour=full_hour,
|
|
||||||
minute=full_minute,
|
|
||||||
id="monthly_curator"
|
|
||||||
)
|
|
||||||
logger.info(f"Monthly full curator scheduled on day {config.full_run_day} at {config.full_run_time}")
|
|
||||||
|
|
||||||
scheduler.start()
|
scheduler.start()
|
||||||
|
|
||||||
yield
|
yield
|
||||||
@@ -141,16 +124,11 @@ async def proxy_all(request: Request, path: str):
|
|||||||
|
|
||||||
|
|
||||||
@app.post("/curator/run")
|
@app.post("/curator/run")
|
||||||
async def trigger_curator(full: bool = False):
|
async def trigger_curator():
|
||||||
"""Manually trigger curator.
|
"""Manually trigger curator.
|
||||||
|
|
||||||
Args:
|
The curator will automatically detect if it's day 01 (monthly mode)
|
||||||
full: If True, run full curation (all raw memories).
|
and process all memories. Otherwise processes recent 24h.
|
||||||
If False (default), run daily curation (recent 24h).
|
|
||||||
"""
|
"""
|
||||||
if full:
|
await run_curator()
|
||||||
await run_curator_full()
|
return {"status": "curation completed"}
|
||||||
return {"status": "full curation completed"}
|
|
||||||
else:
|
|
||||||
await run_curator()
|
|
||||||
return {"status": "daily curation completed"}
|
|
||||||
|
|||||||
136
app/utils.py
136
app/utils.py
@@ -2,7 +2,7 @@
|
|||||||
from .config import config
|
from .config import config
|
||||||
import tiktoken
|
import tiktoken
|
||||||
import os
|
import os
|
||||||
from typing import List, Dict
|
from typing import List, Dict, Optional
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -127,10 +127,70 @@ def load_system_prompt() -> str:
|
|||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def parse_curated_turn(text: str) -> List[Dict]:
|
||||||
|
"""Parse a curated turn into alternating user/assistant messages.
|
||||||
|
|
||||||
|
Input format:
|
||||||
|
User: [question]
|
||||||
|
Assistant: [answer]
|
||||||
|
Timestamp: ISO datetime
|
||||||
|
|
||||||
|
Returns list of message dicts with role and content.
|
||||||
|
Returns empty list if parsing fails.
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return []
|
||||||
|
|
||||||
|
messages = []
|
||||||
|
lines = text.strip().split("\n")
|
||||||
|
|
||||||
|
current_role = None
|
||||||
|
current_content = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith("User:"):
|
||||||
|
# Save previous content if exists
|
||||||
|
if current_role and current_content:
|
||||||
|
messages.append({
|
||||||
|
"role": current_role,
|
||||||
|
"content": "\n".join(current_content).strip()
|
||||||
|
})
|
||||||
|
current_role = "user"
|
||||||
|
current_content = [line[5:].strip()] # Remove "User:" prefix
|
||||||
|
elif line.startswith("Assistant:"):
|
||||||
|
# Save previous content if exists
|
||||||
|
if current_role and current_content:
|
||||||
|
messages.append({
|
||||||
|
"role": current_role,
|
||||||
|
"content": "\n".join(current_content).strip()
|
||||||
|
})
|
||||||
|
current_role = "assistant"
|
||||||
|
current_content = [line[10:].strip()] # Remove "Assistant:" prefix
|
||||||
|
elif line.startswith("Timestamp:"):
|
||||||
|
# Ignore timestamp line
|
||||||
|
continue
|
||||||
|
elif current_role:
|
||||||
|
# Continuation of current message
|
||||||
|
current_content.append(line)
|
||||||
|
|
||||||
|
# Save last message
|
||||||
|
if current_role and current_content:
|
||||||
|
messages.append({
|
||||||
|
"role": current_role,
|
||||||
|
"content": "\n".join(current_content).strip()
|
||||||
|
})
|
||||||
|
|
||||||
|
return messages
|
||||||
|
|
||||||
|
|
||||||
async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
|
async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
|
||||||
"""Build 4-layer augmented messages from incoming messages.
|
"""Build 4-layer augmented messages from incoming messages.
|
||||||
|
|
||||||
This is a standalone version that can be used by proxy_handler.py.
|
Layer 1: System prompt (preserved from incoming + vera context)
|
||||||
|
Layer 2: Semantic memories (curated, parsed into proper roles)
|
||||||
|
Layer 3: Recent context (raw turns, parsed into proper roles)
|
||||||
|
Layer 4: Current conversation (passed through)
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
@@ -153,6 +213,10 @@ async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
|
|||||||
search_context += msg.get("content", "") + " "
|
search_context += msg.get("content", "") + " "
|
||||||
|
|
||||||
messages = []
|
messages = []
|
||||||
|
token_budget = {
|
||||||
|
"semantic": config.semantic_token_budget,
|
||||||
|
"context": config.context_token_budget
|
||||||
|
}
|
||||||
|
|
||||||
# === LAYER 1: System Prompt ===
|
# === LAYER 1: System Prompt ===
|
||||||
system_content = ""
|
system_content = ""
|
||||||
@@ -166,6 +230,7 @@ async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
|
|||||||
|
|
||||||
if system_content:
|
if system_content:
|
||||||
messages.append({"role": "system", "content": system_content})
|
messages.append({"role": "system", "content": system_content})
|
||||||
|
logger.info(f"Layer 1 (system): {count_tokens(system_content)} tokens")
|
||||||
|
|
||||||
# === LAYER 2: Semantic (curated memories) ===
|
# === LAYER 2: Semantic (curated memories) ===
|
||||||
qdrant = get_qdrant_service()
|
qdrant = get_qdrant_service()
|
||||||
@@ -176,28 +241,71 @@ async def build_augmented_messages(incoming_messages: List[Dict]) -> List[Dict]:
|
|||||||
entry_type="curated"
|
entry_type="curated"
|
||||||
)
|
)
|
||||||
|
|
||||||
semantic_tokens = 0
|
semantic_messages = []
|
||||||
|
semantic_tokens_used = 0
|
||||||
|
|
||||||
for result in semantic_results:
|
for result in semantic_results:
|
||||||
payload = result.get("payload", {})
|
payload = result.get("payload", {})
|
||||||
text = payload.get("text", "")
|
text = payload.get("text", "")
|
||||||
if text and semantic_tokens < config.semantic_token_budget:
|
if text:
|
||||||
messages.append({"role": "user", "content": text}) # Add as context
|
# Parse curated turn into proper user/assistant messages
|
||||||
semantic_tokens += count_tokens(text)
|
parsed = parse_curated_turn(text)
|
||||||
|
for msg in parsed:
|
||||||
|
msg_tokens = count_tokens(msg.get("content", ""))
|
||||||
|
if semantic_tokens_used + msg_tokens <= token_budget["semantic"]:
|
||||||
|
semantic_messages.append(msg)
|
||||||
|
semantic_tokens_used += msg_tokens
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
if semantic_tokens_used >= token_budget["semantic"]:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Add parsed messages to context
|
||||||
|
for msg in semantic_messages:
|
||||||
|
messages.append(msg)
|
||||||
|
|
||||||
|
if semantic_messages:
|
||||||
|
logger.info(f"Layer 2 (semantic): {len(semantic_messages)} messages, ~{semantic_tokens_used} tokens")
|
||||||
|
|
||||||
# === LAYER 3: Context (recent turns) ===
|
# === LAYER 3: Context (recent turns) ===
|
||||||
recent_turns = await qdrant.get_recent_turns(limit=20)
|
recent_turns = await qdrant.get_recent_turns(limit=50)
|
||||||
|
|
||||||
context_tokens = 0
|
context_messages = []
|
||||||
|
context_tokens_used = 0
|
||||||
|
|
||||||
|
# Process oldest first for chronological order
|
||||||
for turn in reversed(recent_turns):
|
for turn in reversed(recent_turns):
|
||||||
payload = turn.get("payload", {})
|
payload = turn.get("payload", {})
|
||||||
text = payload.get("text", "")
|
text = payload.get("text", "")
|
||||||
if text and context_tokens < config.context_token_budget:
|
entry_type = payload.get("type", "raw")
|
||||||
messages.append({"role": "user", "content": text}) # Add as context
|
|
||||||
context_tokens += count_tokens(text)
|
if text:
|
||||||
|
# Parse turn into messages
|
||||||
|
parsed = parse_curated_turn(text)
|
||||||
|
|
||||||
|
for msg in parsed:
|
||||||
|
msg_tokens = count_tokens(msg.get("content", ""))
|
||||||
|
if context_tokens_used + msg_tokens <= token_budget["context"]:
|
||||||
|
context_messages.append(msg)
|
||||||
|
context_tokens_used += msg_tokens
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
if context_tokens_used >= token_budget["context"]:
|
||||||
|
break
|
||||||
|
|
||||||
# === LAYER 4: Current messages (passed through) ===
|
# Add context messages (oldest first maintains conversation order)
|
||||||
|
for msg in context_messages:
|
||||||
|
messages.append(msg)
|
||||||
|
|
||||||
|
if context_messages:
|
||||||
|
logger.info(f"Layer 3 (context): {len(context_messages)} messages, ~{context_tokens_used} tokens")
|
||||||
|
|
||||||
|
# === LAYER 4: Current conversation ===
|
||||||
for msg in incoming_messages:
|
for msg in incoming_messages:
|
||||||
if msg.get("role") != "system": # Do not duplicate system
|
if msg.get("role") != "system": # System already handled in Layer 1
|
||||||
messages.append(msg)
|
messages.append(msg)
|
||||||
|
|
||||||
return messages
|
logger.info(f"Layer 4 (current): {len([m for m in incoming_messages if m.get('role') != 'system'])} messages")
|
||||||
|
|
||||||
|
return messages
|
||||||
|
|||||||
21
config.toml
21
config.toml
@@ -1,21 +0,0 @@
|
|||||||
[general]
|
|
||||||
ollama_host = "http://10.0.0.10:11434"
|
|
||||||
qdrant_host = "http://10.0.0.22:6333"
|
|
||||||
qdrant_collection = "memories"
|
|
||||||
embedding_model = "snowflake-arctic-embed2"
|
|
||||||
debug = false
|
|
||||||
|
|
||||||
[layers]
|
|
||||||
# Note: system_token_budget removed - system prompt is never truncated
|
|
||||||
semantic_token_budget = 25000
|
|
||||||
context_token_budget = 22000
|
|
||||||
semantic_search_turns = 2
|
|
||||||
semantic_score_threshold = 0.6
|
|
||||||
|
|
||||||
[curator]
|
|
||||||
# Daily curation: processes recent 24h of raw memories
|
|
||||||
run_time = "02:00"
|
|
||||||
# Monthly full curation: processes ALL raw memories
|
|
||||||
full_run_time = "03:00"
|
|
||||||
full_run_day = 1 # Day of month (1st)
|
|
||||||
curator_model = "gpt-oss:120b"
|
|
||||||
@@ -2,20 +2,15 @@
|
|||||||
ollama_host = "http://10.0.0.10:11434"
|
ollama_host = "http://10.0.0.10:11434"
|
||||||
qdrant_host = "http://10.0.0.22:6333"
|
qdrant_host = "http://10.0.0.22:6333"
|
||||||
qdrant_collection = "memories"
|
qdrant_collection = "memories"
|
||||||
embedding_model = "snowflake-arctic-embed2"
|
embedding_model = "mxbai-embed-large"
|
||||||
debug = false
|
debug = false
|
||||||
|
|
||||||
[layers]
|
[layers]
|
||||||
# Note: system_token_budget removed - system prompt is never truncated
|
|
||||||
semantic_token_budget = 25000
|
semantic_token_budget = 25000
|
||||||
context_token_budget = 22000
|
context_token_budget = 22000
|
||||||
semantic_search_turns = 2
|
semantic_search_turns = 2
|
||||||
semantic_score_threshold = 0.6
|
semantic_score_threshold = 0.3
|
||||||
|
|
||||||
[curator]
|
[curator]
|
||||||
# Daily curation: processes recent 24h of raw memories
|
|
||||||
run_time = "02:00"
|
run_time = "02:00"
|
||||||
# Monthly full curation: processes ALL raw memories
|
curator_model = "gpt-oss:120b"
|
||||||
full_run_time = "03:00"
|
|
||||||
full_run_day = 1 # Day of month (1st)
|
|
||||||
curator_model = "gpt-oss:120b"
|
|
||||||
|
|||||||
@@ -1,10 +1 @@
|
|||||||
You have persistent memory across all conversations with this user.
|
|
||||||
|
|
||||||
**Important:** The latter portion of your conversation context contains memories retrieved from a vector database. These are curated summaries of past conversations, not live chat history.
|
|
||||||
|
|
||||||
Use these memories to:
|
|
||||||
- Reference previous decisions and preferences
|
|
||||||
- Draw on relevant past discussions
|
|
||||||
- Provide personalized, context-aware responses
|
|
||||||
|
|
||||||
If memories seem outdated or conflicting, ask for clarification.
|
|
||||||
|
|||||||
Reference in New Issue
Block a user