Files
vera-ai-v2/tests/test_utils.py
Vera-AI abfcc91eb3 v2.0.3: Improve error handling, add tests, cleanup
- Fix bare except clauses in curator.py and main.py
- Change embedding model to snowflake-arctic-embed2
- Increase semantic_score_threshold to 0.6
- Add memory context explanation to systemprompt.md
- Add pytest dependencies to requirements.txt
- Remove unused context_handler.py and .env.example
- Add project documentation (CLAUDE.md) and test files

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-30 08:47:56 -05:00

85 lines
3.0 KiB
Python

"""Tests for utility functions."""
import pytest
from app.utils import count_tokens, truncate_by_tokens, parse_curated_turn
class TestCountTokens:
"""Tests for count_tokens function."""
def test_empty_string(self):
"""Empty string should return 0 tokens."""
assert count_tokens("") == 0
def test_simple_text(self):
"""Simple text should count tokens correctly."""
text = "Hello, world!"
assert count_tokens(text) > 0
def test_longer_text(self):
"""Longer text should have more tokens."""
short = "Hello"
long = "Hello, this is a longer sentence with more words."
assert count_tokens(long) > count_tokens(short)
class TestTruncateByTokens:
"""Tests for truncate_by_tokens function."""
def test_no_truncation_needed(self):
"""Text shorter than limit should not be truncated."""
text = "Short text"
result = truncate_by_tokens(text, max_tokens=100)
assert result == text
def test_truncation_applied(self):
"""Text longer than limit should be truncated."""
text = "This is a longer piece of text that will need to be truncated"
result = truncate_by_tokens(text, max_tokens=5)
assert count_tokens(result) <= 5
def test_empty_string(self):
"""Empty string should return empty string."""
assert truncate_by_tokens("", max_tokens=10) == ""
class TestParseCuratedTurn:
"""Tests for parse_curated_turn function."""
def test_empty_string(self):
"""Empty string should return empty list."""
assert parse_curated_turn("") == []
def test_single_turn(self):
"""Single Q&A turn should parse correctly."""
text = "User: What is Python?\nAssistant: A programming language."
result = parse_curated_turn(text)
assert len(result) == 2
assert result[0]["role"] == "user"
assert result[0]["content"] == "What is Python?"
assert result[1]["role"] == "assistant"
assert result[1]["content"] == "A programming language."
def test_multiple_turns(self):
"""Multiple Q&A turns should parse correctly."""
text = """User: What is Python?
Assistant: A programming language.
User: Is it popular?
Assistant: Yes, very popular."""
result = parse_curated_turn(text)
assert len(result) == 4
def test_timestamp_ignored(self):
"""Timestamp lines should be ignored."""
text = "User: Question?\nAssistant: Answer.\nTimestamp: 2024-01-01T00:00:00Z"
result = parse_curated_turn(text)
assert len(result) == 2
for msg in result:
assert "Timestamp" not in msg["content"]
def test_multiline_content(self):
"""Multiline content should be preserved."""
text = "User: Line 1\nLine 2\nLine 3\nAssistant: Response"
result = parse_curated_turn(text)
assert "Line 1" in result[0]["content"]
assert "Line 2" in result[0]["content"]
assert "Line 3" in result[0]["content"]