From abfcc91eb35860e79fe038d4e8344f0f257ba5c1 Mon Sep 17 00:00:00 2001 From: Vera-AI Date: Mon, 30 Mar 2026 08:47:56 -0500 Subject: [PATCH] v2.0.3: Improve error handling, add tests, cleanup - Fix bare except clauses in curator.py and main.py - Change embedding model to snowflake-arctic-embed2 - Increase semantic_score_threshold to 0.6 - Add memory context explanation to systemprompt.md - Add pytest dependencies to requirements.txt - Remove unused context_handler.py and .env.example - Add project documentation (CLAUDE.md) and test files Co-Authored-By: Claude Opus 4.6 --- .claude/skills/ssh/SKILL.md | 69 ++++++++++++ .env.example | 31 ------ CLAUDE.md | 128 ++++++++++++++++++++++ app/context_handler.py | 208 ------------------------------------ app/curator.py | 3 +- app/main.py | 3 +- config/config.toml | 4 +- prompts/systemprompt.md | 9 ++ requirements.txt | 2 + tests/__init__.py | 1 + tests/test_config.py | 42 ++++++++ tests/test_utils.py | 85 +++++++++++++++ 12 files changed, 342 insertions(+), 243 deletions(-) create mode 100644 .claude/skills/ssh/SKILL.md delete mode 100644 .env.example create mode 100644 CLAUDE.md delete mode 100644 app/context_handler.py create mode 100644 tests/__init__.py create mode 100644 tests/test_config.py create mode 100644 tests/test_utils.py diff --git a/.claude/skills/ssh/SKILL.md b/.claude/skills/ssh/SKILL.md new file mode 100644 index 0000000..4ae7adf --- /dev/null +++ b/.claude/skills/ssh/SKILL.md @@ -0,0 +1,69 @@ +--- +name: ssh +description: SSH into remote servers and execute commands. Use for remote operations, file transfers, and server management. +allowed-tools: Bash(ssh*), Bash(scp*), Bash(rsync*), Bash(sshpass*), Read, Write +argument-hint: [host-alias] +--- + +## SSH Connections + +| Alias | Host | User | Password | Hostname | Purpose | +|-------|------|------|----------|----------|---------| +| `deb9` | `10.0.0.48` | `n8n` | `passw0rd` | epyc-deb9 | vera-ai source project | +| `deb8` | `10.0.0.46` | `n8n` | `passw0rd` | epyc-deb8 | vera-ai Docker runtime | + +## Connection Commands + +**Interactive SSH:** +```bash +sshpass -p 'passw0rd' ssh -o StrictHostKeyChecking=no n8n@10.0.0.48 +sshpass -p 'passw0rd' ssh -o StrictHostKeyChecking=no n8n@10.0.0.46 +``` + +**Run single command:** +```bash +sshpass -p 'passw0rd' ssh -o StrictHostKeyChecking=no n8n@10.0.0.48 "command" +sshpass -p 'passw0rd' ssh -o StrictHostKeyChecking=no n8n@10.0.0.46 "command" +``` + +**Copy file to server:** +```bash +sshpass -p 'passw0rd' scp -o StrictHostKeyChecking=no local_file n8n@10.0.0.48:/remote/path +sshpass -p 'passw0rd' scp -o StrictHostKeyChecking=no local_file n8n@10.0.0.46:/remote/path +``` + +**Copy file from server:** +```bash +sshpass -p 'passw0rd' scp -o StrictHostKeyChecking=no n8n@10.0.0.48:/remote/path local_file +sshpass -p 'passw0rd' scp -o StrictHostKeyChecking=no n8n@10.0.0.46:/remote/path local_file +``` + +**Sync directory to server:** +```bash +sshpass -p 'passw0rd' rsync -avz -e "ssh -o StrictHostKeyChecking=no" local_dir/ n8n@10.0.0.48:/remote/path/ +sshpass -p 'passw0rd' rsync -avz -e "ssh -o StrictHostKeyChecking=no" local_dir/ n8n@10.0.0.46:/remote/path/ +``` + +**Sync directory from server:** +```bash +sshpass -p 'passw0rd' rsync -avz -e "ssh -o StrictHostKeyChecking=no" n8n@10.0.0.48:/remote/path/ local_dir/ +sshpass -p 'passw0rd' rsync -avz -e "ssh -o StrictHostKeyChecking=no" n8n@10.0.0.46:/remote/path/ local_dir/ +``` + +## Notes + +- Uses `sshpass` to handle password authentication non-interactively +- `-o StrictHostKeyChecking=no` prevents host key prompts (useful for automation) +- For frequent connections, consider setting up SSH key authentication instead of password + +## SSH Config (Optional) + +To simplify connections, add to `~/.ssh/config`: + +``` +Host n8n-server + HostName 10.0.0.48 + User n8n +``` + +Then connect with just `ssh n8n-server` (still needs password or key). \ No newline at end of file diff --git a/.env.example b/.env.example deleted file mode 100644 index d456fbf..0000000 --- a/.env.example +++ /dev/null @@ -1,31 +0,0 @@ -# Vera-AI Environment Configuration -# Copy this file to .env and customize for your deployment - -# ============================================================================= -# User/Group Configuration -# ============================================================================= -# UID and GID for the container user (must match host user for volume permissions) -# Run: id -u and id -g on your host to get these values -APP_UID=1000 -APP_GID=1000 - -# ============================================================================= -# Timezone Configuration -# ============================================================================= -# Timezone for the container (affects scheduler times) -# Common values: UTC, America/New_York, America/Chicago, America/Los_Angeles, Europe/London -TZ=America/Chicago - -# ============================================================================= -# API Keys (Optional) -# ============================================================================= -# OpenRouter API key for cloud model routing -# OPENROUTER_API_KEY=your_api_key_here - -# ============================================================================= -# Vera-AI Configuration Paths (Optional) -# ============================================================================= -# These can be overridden via environment variables -# VERA_CONFIG_DIR=/app/config -# VERA_PROMPTS_DIR=/app/prompts -# VERA_STATIC_DIR=/app/static \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..6eccc56 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,128 @@ +# Vera-AI Project + +**Persistent Memory Proxy for Ollama** + +> **Status:** Built and running on deb8. Goal: Validate and improve. + +Vera-AI sits between AI clients and Ollama, storing conversations in Qdrant and retrieving context semantically — giving AI **true memory**. + +## Architecture + +``` +Client → Vera-AI (port 11434) → Ollama + ↓ + Qdrant (vector DB) + ↓ + Memory Storage +``` + +## Key Components + +| File | Purpose | +|------|---------| +| `app/main.py` | FastAPI application entry point | +| `app/proxy_handler.py` | Chat request handling | +| `app/qdrant_service.py` | Vector DB operations | +| `app/curator.py` | Memory curation (daily/monthly) | +| `app/config.py` | Configuration loader | +| `config/config.toml` | Main configuration file | + +## 4-Layer Context System + +1. **System Prompt** — From `prompts/systemprompt.md` +2. **Semantic Memory** — Curated Q&A from Qdrant (relevance search) +3. **Recent Context** — Last N conversation turns +4. **Current Messages** — User's current request + +## Configuration + +Key settings in `config/config.toml`: + +```toml +[general] +ollama_host = "http://10.0.0.10:11434" +qdrant_host = "http://10.0.0.22:6333" +qdrant_collection = "memories" +embedding_model = "snowflake-arctic-embed2" + +[layers] +semantic_token_budget = 25000 +context_token_budget = 22000 +semantic_search_turns = 2 +semantic_score_threshold = 0.6 + +[curator] +run_time = "02:00" # Daily curation time +curator_model = "gpt-oss:120b" +``` + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `APP_UID` | `999` | Container user ID | +| `APP_GID` | `999` | Container group ID | +| `TZ` | `UTC` | Timezone | +| `VERA_DEBUG` | `false` | Enable debug logging | + +## Running + +```bash +# Build and start +docker compose build +docker compose up -d + +# Check status +docker ps +docker logs VeraAI --tail 20 + +# Health check +curl http://localhost:11434/ +``` + +## API Endpoints + +| Endpoint | Method | Description | +|----------|--------|-------------| +| `/` | GET | Health check | +| `/api/chat` | POST | Chat completion (with memory) | +| `/api/tags` | GET | List models | +| `/api/generate` | POST | Generate completion | +| `/curator/run` | POST | Trigger curation manually | + +## Development Workflow + +This project is synced with **deb9** (10.0.0.48). To sync changes: + +```bash +# Pull from deb9 +sshpass -p 'passw0rd' scp -r -o StrictHostKeyChecking=no n8n@10.0.0.48:/home/n8n/vera-ai/* /home/n8n/vera-ai/ + +# Push to deb9 (after local changes) +sshpass -p 'passw0rd' scp -r -o StrictHostKeyChecking=no /home/n8n/vera-ai/* n8n@10.0.0.48:/home/n8n/vera-ai/ +``` + +## Memory System + +- **raw** memories — Unprocessed conversation turns (until curation) +- **curated** memories — Cleaned Q&A pairs (permanent) +- **test** memories — Test entries (can be ignored) + +Curation runs daily at 02:00 and monthly on the 1st at 03:00. + +## Related Infrastructure + +| Service | Host | Port | +|---------|------|------| +| Qdrant | 10.0.0.22 | 6333 | +| Ollama | 10.0.0.10 | 11434 | +| deb9 | 10.0.0.48 | Source project (SSH) | +| deb8 | 10.0.0.46 | Docker runtime | + +## Qdrant Collections + +| Collection | Purpose | +|------------|---------| +| `python_kb` | Python code patterns reference for this project | +| `memories` | Conversation memory storage (default) | +| `vera_memories` | Alternative memory collection | \ No newline at end of file diff --git a/app/context_handler.py b/app/context_handler.py deleted file mode 100644 index 2c5aa62..0000000 --- a/app/context_handler.py +++ /dev/null @@ -1,208 +0,0 @@ -"""Context handler - builds 4-layer context for every request.""" -import httpx -import logging -from typing import List, Dict, Any, Optional -from pathlib import Path -from .config import Config -from .qdrant_service import QdrantService -from .utils import count_tokens, truncate_by_tokens - -logger = logging.getLogger(__name__) - - -class ContextHandler: - def __init__(self, config: Config): - self.config = config - self.qdrant = QdrantService( - host=config.qdrant_host, - collection=config.qdrant_collection, - embedding_model=config.embedding_model, - ollama_host=config.ollama_host - ) - self.system_prompt = self._load_system_prompt() - - def _load_system_prompt(self) -> str: - """Load system prompt from static/systemprompt.md.""" - try: - path = Path(__file__).parent.parent / "static" / "systemprompt.md" - return path.read_text().strip() - except FileNotFoundError: - logger.error("systemprompt.md not found - required file") - raise - - async def process(self, messages: List[Dict], model: str, stream: bool = False) -> Dict: - """Process chat request through 4-layer context.""" - # Get user question (last user message) - user_question = "" - for msg in reversed(messages): - if msg.get("role") == "user": - user_question = msg.get("content", "") - break - - # Get messages for semantic search (last N turns) - search_messages = [] - for msg in messages[-self.config.semantic_search_turns:]: - if msg.get("role") in ("user", "assistant"): - search_messages.append(msg.get("content", "")) - - # Build the 4-layer context messages - context_messages = await self.build_context_messages( - incoming_system=next((m for m in messages if m.get("role") == "system"), None), - user_question=user_question, - search_context=" ".join(search_messages) - ) - - # Forward to Ollama - async with httpx.AsyncClient(timeout=120.0) as client: - response = await client.post( - f"{self.config.ollama_host}/api/chat", - json={"model": model, "messages": context_messages, "stream": stream} - ) - result = response.json() - - # Store the Q&A turn in Qdrant - assistant_msg = result.get("message", {}).get("content", "") - await self.qdrant.store_qa_turn(user_question, assistant_msg) - - return result - - def _parse_curated_turn(self, text: str) -> List[Dict]: - """Parse a curated turn into alternating user/assistant messages. - - Input format: - User: [question] - Assistant: [answer] - Timestamp: ISO datetime - - Returns list of message dicts with role and content. - """ - messages = [] - lines = text.strip().split("\n") - - current_role = None - current_content = [] - - for line in lines: - line = line.strip() - if line.startswith("User:"): - # Save previous content if exists - if current_role and current_content: - messages.append({ - "role": current_role, - "content": "\n".join(current_content).strip() - }) - current_role = "user" - current_content = [line[5:].strip()] # Remove "User:" prefix - elif line.startswith("Assistant:"): - # Save previous content if exists - if current_role and current_content: - messages.append({ - "role": current_role, - "content": "\n".join(current_content).strip() - }) - current_role = "assistant" - current_content = [line[10:].strip()] # Remove "Assistant:" prefix - elif line.startswith("Timestamp:"): - # Ignore timestamp line - continue - elif current_role: - # Continuation of current message - current_content.append(line) - - # Save last message - if current_role and current_content: - messages.append({ - "role": current_role, - "content": "\n".join(current_content).strip() - }) - - return messages - - async def build_context_messages(self, incoming_system: Optional[Dict], user_question: str, search_context: str) -> List[Dict]: - """Build 4-layer context messages array.""" - messages = [] - token_budget = { - "semantic": self.config.semantic_token_budget, - "context": self.config.context_token_budget - } - - # === LAYER 1: System Prompt (pass through unchanged) === - # DO NOT truncate - preserve system prompt entirely - system_content = "" - if incoming_system: - system_content = incoming_system.get("content", "") - logger.info(f"System layer: preserved incoming system {len(system_content)} chars, {count_tokens(system_content)} tokens") - - # Add Vera context info if present (small, just metadata) - if self.system_prompt.strip(): - system_content += "\n\n" + self.system_prompt - logger.info(f"System layer: added vera context {len(self.system_prompt)} chars") - - messages.append({"role": "system", "content": system_content}) - - # === LAYER 2: Semantic Layer (curated memories) === - # Search for curated blocks only - semantic_results = await self.qdrant.semantic_search( - query=search_context if search_context else user_question, - limit=20, - score_threshold=self.config.semantic_score_threshold, - entry_type="curated" - ) - - # Parse curated turns into alternating user/assistant messages - semantic_messages = [] - semantic_tokens_used = 0 - - for result in semantic_results: - payload = result.get("payload", {}) - text = payload.get("text", "") - if text: - parsed = self._parse_curated_turn(text) - for msg in parsed: - msg_tokens = count_tokens(msg.get("content", "")) - if semantic_tokens_used + msg_tokens <= token_budget["semantic"]: - semantic_messages.append(msg) - semantic_tokens_used += msg_tokens - else: - break - - # Add parsed messages to context - for msg in semantic_messages: - messages.append(msg) - - if semantic_messages: - logger.info(f"Semantic layer: {len(semantic_messages)} messages, ~{semantic_tokens_used} tokens") - - # === LAYER 3: Context Layer (recent turns) === - recent_turns = await self.qdrant.get_recent_turns(limit=50) - - context_messages_parsed = [] - context_tokens_used = 0 - - for turn in reversed(recent_turns): # Oldest first - payload = turn.get("payload", {}) - text = payload.get("text", "") - entry_type = payload.get("type", "raw") - - if text: - # Parse turn into messages - parsed = self._parse_curated_turn(text) - - for msg in parsed: - msg_tokens = count_tokens(msg.get("content", "")) - if context_tokens_used + msg_tokens <= token_budget["context"]: - context_messages_parsed.append(msg) - context_tokens_used += msg_tokens - else: - break - - for msg in context_messages_parsed: - messages.append(msg) - - if context_messages_parsed: - logger.info(f"Context layer: {len(context_messages_parsed)} messages, ~{context_tokens_used} tokens") - - # === LAYER 4: Current Question === - messages.append({"role": "user", "content": user_question}) - - return messages \ No newline at end of file diff --git a/app/curator.py b/app/curator.py index 161b55d..f6685ea 100644 --- a/app/curator.py +++ b/app/curator.py @@ -171,7 +171,8 @@ Remember: Respond with ONLY valid JSON. No markdown, no explanations, just the J mem_time = datetime.fromisoformat(timestamp.replace("Z", "+00:00")) cutoff = datetime.utcnow() - timedelta(hours=hours) return mem_time.replace(tzinfo=None) > cutoff - except: + except (ValueError, TypeError): + logger.debug(f"Could not parse timestamp: {timestamp}") return True def _format_raw_turns(self, turns: List[Dict]) -> str: diff --git a/app/main.py b/app/main.py index 9ee03ae..9a7453d 100644 --- a/app/main.py +++ b/app/main.py @@ -80,7 +80,8 @@ async def health_check(): resp = await client.get(f"{config.ollama_host}/api/tags") if resp.status_code == 200: ollama_status = "reachable" - except: pass + except Exception: + logger.warning(f"Failed to reach Ollama at {config.ollama_host}") return {"status": "ok", "ollama": ollama_status} diff --git a/config/config.toml b/config/config.toml index 94cfe44..fb58639 100644 --- a/config/config.toml +++ b/config/config.toml @@ -2,14 +2,14 @@ ollama_host = "http://10.0.0.10:11434" qdrant_host = "http://10.0.0.22:6333" qdrant_collection = "memories" -embedding_model = "mxbai-embed-large" +embedding_model = "snowflake-arctic-embed2" debug = false [layers] semantic_token_budget = 25000 context_token_budget = 22000 semantic_search_turns = 2 -semantic_score_threshold = 0.3 +semantic_score_threshold = 0.6 [curator] run_time = "02:00" diff --git a/prompts/systemprompt.md b/prompts/systemprompt.md index 8b13789..27eff8a 100644 --- a/prompts/systemprompt.md +++ b/prompts/systemprompt.md @@ -1 +1,10 @@ +You have persistent memory across all conversations with this user. +**Important:** The latter portion of your conversation context contains memories retrieved from a vector database. These are curated summaries of past conversations, not live chat history. + +Use these memories to: +- Reference previous decisions and preferences +- Draw on relevant past discussions +- Provide personalized, context-aware responses + +If memories seem outdated or conflicting, ask for clarification. diff --git a/requirements.txt b/requirements.txt index 6b5d00e..767784d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,5 @@ ollama>=0.1.0 toml>=0.10.2 tiktoken>=0.5.0 apscheduler>=3.10.0 +pytest>=7.0.0 +pytest-asyncio>=0.21.0 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..5f19b37 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# Test package \ No newline at end of file diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..a633cb0 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,42 @@ +"""Tests for configuration.""" +import pytest +from pathlib import Path +from app.config import Config, EMBEDDING_DIMS + + +class TestConfig: + """Tests for Config class.""" + + def test_default_values(self): + """Config should have sensible defaults.""" + config = Config() + assert config.ollama_host == "http://10.0.0.10:11434" + assert config.qdrant_host == "http://10.0.0.22:6333" + assert config.qdrant_collection == "memories" + assert config.embedding_model == "snowflake-arctic-embed2" + + def test_vector_size_property(self): + """Vector size should match embedding model.""" + config = Config(embedding_model="snowflake-arctic-embed2") + assert config.vector_size == 1024 + + def test_vector_size_fallback(self): + """Unknown model should default to 1024.""" + config = Config(embedding_model="unknown-model") + assert config.vector_size == 1024 + + +class TestEmbeddingDims: + """Tests for embedding dimensions mapping.""" + + def test_snowflake_arctic_embed2(self): + """snowflake-arctic-embed2 should have 1024 dimensions.""" + assert EMBEDDING_DIMS["snowflake-arctic-embed2"] == 1024 + + def test_nomic_embed_text(self): + """nomic-embed-text should have 768 dimensions.""" + assert EMBEDDING_DIMS["nomic-embed-text"] == 768 + + def test_mxbai_embed_large(self): + """mxbai-embed-large should have 1024 dimensions.""" + assert EMBEDDING_DIMS["mxbai-embed-large"] == 1024 \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..129951f --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,85 @@ +"""Tests for utility functions.""" +import pytest +from app.utils import count_tokens, truncate_by_tokens, parse_curated_turn + + +class TestCountTokens: + """Tests for count_tokens function.""" + + def test_empty_string(self): + """Empty string should return 0 tokens.""" + assert count_tokens("") == 0 + + def test_simple_text(self): + """Simple text should count tokens correctly.""" + text = "Hello, world!" + assert count_tokens(text) > 0 + + def test_longer_text(self): + """Longer text should have more tokens.""" + short = "Hello" + long = "Hello, this is a longer sentence with more words." + assert count_tokens(long) > count_tokens(short) + + +class TestTruncateByTokens: + """Tests for truncate_by_tokens function.""" + + def test_no_truncation_needed(self): + """Text shorter than limit should not be truncated.""" + text = "Short text" + result = truncate_by_tokens(text, max_tokens=100) + assert result == text + + def test_truncation_applied(self): + """Text longer than limit should be truncated.""" + text = "This is a longer piece of text that will need to be truncated" + result = truncate_by_tokens(text, max_tokens=5) + assert count_tokens(result) <= 5 + + def test_empty_string(self): + """Empty string should return empty string.""" + assert truncate_by_tokens("", max_tokens=10) == "" + + +class TestParseCuratedTurn: + """Tests for parse_curated_turn function.""" + + def test_empty_string(self): + """Empty string should return empty list.""" + assert parse_curated_turn("") == [] + + def test_single_turn(self): + """Single Q&A turn should parse correctly.""" + text = "User: What is Python?\nAssistant: A programming language." + result = parse_curated_turn(text) + assert len(result) == 2 + assert result[0]["role"] == "user" + assert result[0]["content"] == "What is Python?" + assert result[1]["role"] == "assistant" + assert result[1]["content"] == "A programming language." + + def test_multiple_turns(self): + """Multiple Q&A turns should parse correctly.""" + text = """User: What is Python? +Assistant: A programming language. +User: Is it popular? +Assistant: Yes, very popular.""" + result = parse_curated_turn(text) + assert len(result) == 4 + + def test_timestamp_ignored(self): + """Timestamp lines should be ignored.""" + text = "User: Question?\nAssistant: Answer.\nTimestamp: 2024-01-01T00:00:00Z" + result = parse_curated_turn(text) + assert len(result) == 2 + for msg in result: + assert "Timestamp" not in msg["content"] + + def test_multiline_content(self): + """Multiline content should be preserved.""" + text = "User: Line 1\nLine 2\nLine 3\nAssistant: Response" + result = parse_curated_turn(text) + assert "Line 1" in result[0]["content"] + assert "Line 2" in result[0]["content"] + assert "Line 3" in result[0]["content"] \ No newline at end of file