IntegraChat / backend /tests /test_analytics_store.py
nothingworry's picture
working Tenant ID
c509b44
raw
history blame
6.31 kB
"""
Tests for AnalyticsStore - tenant-level analytics logging
"""
import sys
from pathlib import Path
# Add backend directory to Python path
backend_dir = Path(__file__).parent.parent
sys.path.insert(0, str(backend_dir))
import pytest
import time
import tempfile
import os
from api.storage.analytics_store import AnalyticsStore
@pytest.fixture
def temp_analytics_db():
"""Create a temporary database for testing."""
with tempfile.NamedTemporaryFile(delete=False, suffix='.db') as f:
db_path = f.name
yield db_path
# Cleanup - close any connections first
try:
if os.path.exists(db_path):
# On Windows, we need to ensure the file is closed
import time
time.sleep(0.1) # Brief delay to ensure file is released
os.unlink(db_path)
except (PermissionError, OSError):
# File might still be in use, that's okay for temp files
pass
@pytest.fixture
def analytics_store(temp_analytics_db):
"""Create an AnalyticsStore instance with temporary database."""
return AnalyticsStore(db_path=temp_analytics_db)
def test_analytics_store_init(analytics_store):
"""Test that AnalyticsStore initializes correctly."""
assert analytics_store is not None
assert analytics_store.db_path.exists()
def test_log_tool_usage(analytics_store):
"""Test logging tool usage events."""
analytics_store.log_tool_usage(
tenant_id="test_tenant",
tool_name="rag",
latency_ms=150,
tokens_used=500,
success=True,
user_id="user123"
)
stats = analytics_store.get_tool_usage_stats("test_tenant")
assert "rag" in stats
assert stats["rag"]["count"] == 1
assert stats["rag"]["avg_latency_ms"] == 150.0
assert stats["rag"]["total_tokens"] == 500
def test_log_redflag_violation(analytics_store):
"""Test logging red-flag violations."""
analytics_store.log_redflag_violation(
tenant_id="test_tenant",
rule_id="rule123",
rule_pattern=".*password.*",
severity="high",
matched_text="password123",
confidence=0.95,
message_preview="User entered password123",
user_id="user123"
)
violations = analytics_store.get_redflag_violations("test_tenant", limit=10)
assert len(violations) == 1
assert violations[0]["severity"] == "high"
assert violations[0]["confidence"] == 0.95
assert violations[0]["matched_text"] == "password123"
def test_log_rag_search(analytics_store):
"""Test logging RAG search events with quality metrics."""
analytics_store.log_rag_search(
tenant_id="test_tenant",
query="What is the policy?",
hits_count=5,
avg_score=0.85,
top_score=0.92,
latency_ms=120
)
metrics = analytics_store.get_rag_quality_metrics("test_tenant")
assert metrics["total_searches"] == 1
assert metrics["avg_hits_per_search"] == 5.0
assert metrics["avg_score"] == 0.85
assert metrics["avg_top_score"] == 0.92
def test_log_agent_query(analytics_store):
"""Test logging agent query events."""
analytics_store.log_agent_query(
tenant_id="test_tenant",
message_preview="What is the company policy?",
intent="rag",
tools_used=["rag", "llm"],
total_tokens=1000,
total_latency_ms=250,
success=True,
user_id="user123"
)
activity = analytics_store.get_activity_summary("test_tenant")
assert activity["total_queries"] == 1
assert activity["active_users"] == 1
def test_tool_usage_stats_filtered_by_time(analytics_store):
"""Test that tool usage stats can be filtered by timestamp."""
# Log an old event (1 day ago)
old_timestamp = int(time.time()) - 86400
# Note: We can't directly set timestamp in current implementation,
# but we can test the filtering works
analytics_store.log_tool_usage(
tenant_id="test_tenant",
tool_name="web",
latency_ms=100
)
# Get stats without time filter
all_stats = analytics_store.get_tool_usage_stats("test_tenant")
assert "web" in all_stats
# Get stats with recent time filter
recent_timestamp = int(time.time()) - 3600 # Last hour
recent_stats = analytics_store.get_tool_usage_stats("test_tenant", recent_timestamp)
assert "web" in recent_stats
def test_get_activity_summary(analytics_store):
"""Test getting activity summary for a tenant."""
# Log multiple queries
for i in range(3):
analytics_store.log_agent_query(
tenant_id="test_tenant",
message_preview=f"Query {i}",
intent="general",
tools_used=["llm"],
user_id=f"user{i}"
)
activity = analytics_store.get_activity_summary("test_tenant")
assert activity["total_queries"] == 3
assert activity["active_users"] == 3
def test_get_rag_quality_metrics(analytics_store):
"""Test getting RAG quality metrics."""
# Log multiple RAG searches
for i in range(3):
analytics_store.log_rag_search(
tenant_id="test_tenant",
query=f"Query {i}",
hits_count=5 + i,
avg_score=0.8 + i * 0.05,
top_score=0.9 + i * 0.05,
latency_ms=100 + i * 10
)
metrics = analytics_store.get_rag_quality_metrics("test_tenant")
assert metrics["total_searches"] == 3
assert metrics["avg_hits_per_search"] > 0
assert metrics["avg_score"] > 0
def test_multiple_tenants_isolation(analytics_store):
"""Test that analytics are properly isolated by tenant."""
# Log events for tenant1
analytics_store.log_tool_usage(
tenant_id="tenant1",
tool_name="rag",
latency_ms=100
)
# Log events for tenant2
analytics_store.log_tool_usage(
tenant_id="tenant2",
tool_name="web",
latency_ms=200
)
# Check tenant1 stats
tenant1_stats = analytics_store.get_tool_usage_stats("tenant1")
assert "rag" in tenant1_stats
assert "web" not in tenant1_stats
# Check tenant2 stats
tenant2_stats = analytics_store.get_tool_usage_stats("tenant2")
assert "web" in tenant2_stats
assert "rag" not in tenant2_stats