felix-framework / tests /unit /test_knowledge_store.py
jkbennitt
Clean hf-space branch and prepare for HuggingFace Spaces deployment
fb867c3
"""
Unit tests for Knowledge Store System.
Tests the KnowledgeStore, KnowledgeEntry, KnowledgeQuery classes and all
persistence, retrieval, and management functionality.
"""
import pytest
import json
import time
import sqlite3
import tempfile
import os
from pathlib import Path
from unittest.mock import patch, MagicMock
from src.memory.knowledge_store import (
KnowledgeStore, KnowledgeEntry, KnowledgeQuery,
KnowledgeType, ConfidenceLevel
)
class TestKnowledgeEntry:
"""Test KnowledgeEntry data class and serialization."""
def test_knowledge_entry_creation(self):
"""Test basic KnowledgeEntry creation."""
content = {"task": "blog_writing", "result": "success"}
entry = KnowledgeEntry(
knowledge_id="test_id_123",
knowledge_type=KnowledgeType.TASK_RESULT,
content=content,
confidence_level=ConfidenceLevel.HIGH,
source_agent="test_agent",
domain="writing",
tags=["blog", "success"]
)
assert entry.knowledge_id == "test_id_123"
assert entry.knowledge_type == KnowledgeType.TASK_RESULT
assert entry.content == content
assert entry.confidence_level == ConfidenceLevel.HIGH
assert entry.source_agent == "test_agent"
assert entry.domain == "writing"
assert entry.tags == ["blog", "success"]
assert entry.access_count == 0
assert entry.success_rate == 1.0
assert isinstance(entry.created_at, float)
assert isinstance(entry.updated_at, float)
def test_knowledge_entry_defaults(self):
"""Test KnowledgeEntry with default values."""
entry = KnowledgeEntry(
knowledge_id="test_123",
knowledge_type=KnowledgeType.AGENT_INSIGHT,
content={"insight": "useful pattern"},
confidence_level=ConfidenceLevel.MEDIUM,
source_agent="analyzer",
domain="analysis"
)
assert entry.tags == []
assert entry.access_count == 0
assert entry.success_rate == 1.0
assert entry.related_entries == []
assert entry.created_at > 0
def test_to_dict_serialization(self):
"""Test KnowledgeEntry serialization to dictionary."""
entry = KnowledgeEntry(
knowledge_id="serialize_test",
knowledge_type=KnowledgeType.OPTIMIZATION_DATA,
content={"metric": "efficiency", "value": 0.85},
confidence_level=ConfidenceLevel.VERIFIED,
source_agent="optimizer",
domain="performance",
tags=["optimization", "metrics"],
access_count=5,
success_rate=0.9,
related_entries=["related_123"]
)
entry_dict = entry.to_dict()
assert entry_dict["knowledge_id"] == "serialize_test"
assert entry_dict["knowledge_type"] == "optimization_data"
assert entry_dict["confidence_level"] == "verified"
assert entry_dict["content"]["metric"] == "efficiency"
assert entry_dict["source_agent"] == "optimizer"
assert entry_dict["domain"] == "performance"
assert entry_dict["tags"] == ["optimization", "metrics"]
assert entry_dict["access_count"] == 5
assert entry_dict["success_rate"] == 0.9
assert entry_dict["related_entries"] == ["related_123"]
def test_from_dict_deserialization(self):
"""Test KnowledgeEntry deserialization from dictionary."""
data = {
"knowledge_id": "deserialize_test",
"knowledge_type": "pattern_recognition",
"content": {"pattern": "helix_convergence", "accuracy": 0.92},
"confidence_level": "high",
"source_agent": "pattern_detector",
"domain": "geometry",
"tags": ["patterns", "helix"],
"created_at": 1640995200.0,
"updated_at": 1640995300.0,
"access_count": 3,
"success_rate": 0.85,
"related_entries": ["pattern_456"]
}
entry = KnowledgeEntry.from_dict(data)
assert entry.knowledge_id == "deserialize_test"
assert entry.knowledge_type == KnowledgeType.PATTERN_RECOGNITION
assert entry.confidence_level == ConfidenceLevel.HIGH
assert entry.content["pattern"] == "helix_convergence"
assert entry.source_agent == "pattern_detector"
assert entry.domain == "geometry"
assert entry.tags == ["patterns", "helix"]
assert entry.created_at == 1640995200.0
assert entry.access_count == 3
assert entry.success_rate == 0.85
class TestKnowledgeQuery:
"""Test KnowledgeQuery configuration."""
def test_knowledge_query_defaults(self):
"""Test KnowledgeQuery with default values."""
query = KnowledgeQuery()
assert query.knowledge_types is None
assert query.domains is None
assert query.tags is None
assert query.min_confidence is None
assert query.min_success_rate is None
assert query.content_keywords is None
assert query.time_range is None
assert query.limit == 10
def test_knowledge_query_specific_filters(self):
"""Test KnowledgeQuery with specific filters."""
query = KnowledgeQuery(
knowledge_types=[KnowledgeType.TASK_RESULT, KnowledgeType.AGENT_INSIGHT],
domains=["writing", "analysis"],
tags=["blog", "research"],
min_confidence=ConfidenceLevel.MEDIUM,
min_success_rate=0.7,
content_keywords=["optimization", "efficiency"],
time_range=(1640995200.0, 1640995800.0),
limit=25
)
assert len(query.knowledge_types) == 2
assert KnowledgeType.TASK_RESULT in query.knowledge_types
assert query.domains == ["writing", "analysis"]
assert query.tags == ["blog", "research"]
assert query.min_confidence == ConfidenceLevel.MEDIUM
assert query.min_success_rate == 0.7
assert query.content_keywords == ["optimization", "efficiency"]
assert query.time_range == (1640995200.0, 1640995800.0)
assert query.limit == 25
class TestKnowledgeStore:
"""Test KnowledgeStore functionality."""
@pytest.fixture
def temp_db_path(self):
"""Create temporary database file for testing."""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as temp_file:
temp_path = temp_file.name
yield temp_path
# Cleanup after test
if os.path.exists(temp_path):
os.unlink(temp_path)
@pytest.fixture
def knowledge_store(self, temp_db_path):
"""Create KnowledgeStore instance for testing."""
return KnowledgeStore(storage_path=temp_db_path, enable_compression=False)
@pytest.fixture
def compressed_store(self, temp_db_path):
"""Create KnowledgeStore with compression enabled."""
return KnowledgeStore(storage_path=temp_db_path, enable_compression=True)
def test_knowledge_store_initialization(self, temp_db_path):
"""Test KnowledgeStore initialization and database creation."""
store = KnowledgeStore(storage_path=temp_db_path)
assert store.storage_path == Path(temp_db_path)
assert store.enable_compression is True # Default
assert os.path.exists(temp_db_path)
# Verify database schema
with sqlite3.connect(temp_db_path) as conn:
cursor = conn.execute("""
SELECT name FROM sqlite_master
WHERE type='table' AND name='knowledge_entries'
""")
assert cursor.fetchone() is not None
def test_database_initialization_with_indexes(self, temp_db_path):
"""Test that database indexes are created correctly."""
store = KnowledgeStore(storage_path=temp_db_path)
with sqlite3.connect(temp_db_path) as conn:
cursor = conn.execute("""
SELECT name FROM sqlite_master
WHERE type='index' AND tbl_name='knowledge_entries'
""")
indexes = [row[0] for row in cursor.fetchall()]
# Check that required indexes exist
expected_indexes = [
"idx_knowledge_type", "idx_domain",
"idx_confidence", "idx_created_at"
]
for index in expected_indexes:
assert index in indexes
def test_generate_knowledge_id(self, knowledge_store):
"""Test knowledge ID generation."""
content1 = {"task": "writing", "result": "success"}
content2 = {"task": "analysis", "result": "complete"}
# Same content and agent should generate different IDs (due to timestamp)
with patch('time.time', return_value=1640995200.0):
id1 = knowledge_store._generate_knowledge_id(content1, "agent1")
with patch('time.time', return_value=1640995300.0):
id2 = knowledge_store._generate_knowledge_id(content1, "agent1")
assert id1 != id2
assert len(id1) == 16 # SHA256 truncated to 16 chars
assert len(id2) == 16
# Different content should generate different IDs
id3 = knowledge_store._generate_knowledge_id(content2, "agent1")
assert id3 != id1
def test_compress_decompress_content(self, knowledge_store):
"""Test content compression and decompression."""
content = {
"large_data": "x" * 2000, # Large content
"nested": {"data": [1, 2, 3, 4, 5]},
"metadata": {"timestamp": 1640995200.0}
}
compressed = knowledge_store._compress_content(content)
assert isinstance(compressed, bytes)
assert len(compressed) < len(json.dumps(content)) # Should be smaller
decompressed = knowledge_store._decompress_content(compressed)
assert decompressed == content
def test_store_knowledge_basic(self, knowledge_store):
"""Test basic knowledge storage."""
content = {"task": "blog_writing", "outcome": "successful"}
knowledge_id = knowledge_store.store_knowledge(
knowledge_type=KnowledgeType.TASK_RESULT,
content=content,
confidence_level=ConfidenceLevel.HIGH,
source_agent="writer_agent",
domain="writing",
tags=["blog", "success"]
)
assert isinstance(knowledge_id, str)
assert len(knowledge_id) == 16
# Verify data is stored in database
with sqlite3.connect(knowledge_store.storage_path) as conn:
cursor = conn.execute("""
SELECT knowledge_id, knowledge_type, content_json, source_agent, domain
FROM knowledge_entries WHERE knowledge_id = ?
""", (knowledge_id,))
row = cursor.fetchone()
assert row is not None
assert row[0] == knowledge_id
assert row[1] == "task_result"
assert json.loads(row[2]) == content
assert row[3] == "writer_agent"
assert row[4] == "writing"
def test_store_knowledge_with_compression(self, compressed_store):
"""Test knowledge storage with compression for large content."""
# Create large content that should trigger compression
large_content = {
"description": "Large content " + "x" * 2000,
"data": list(range(100)),
"metadata": {"type": "performance_test"}
}
knowledge_id = compressed_store.store_knowledge(
knowledge_type=KnowledgeType.OPTIMIZATION_DATA,
content=large_content,
confidence_level=ConfidenceLevel.MEDIUM,
source_agent="performance_agent",
domain="optimization"
)
# Verify compressed storage
with sqlite3.connect(compressed_store.storage_path) as conn:
cursor = conn.execute("""
SELECT content_json, content_compressed
FROM knowledge_entries WHERE knowledge_id = ?
""", (knowledge_id,))
row = cursor.fetchone()
assert row[0] == "" # JSON should be empty (compressed)
assert row[1] is not None # Compressed data should exist
assert isinstance(row[1], bytes)
def test_store_knowledge_without_tags(self, knowledge_store):
"""Test storing knowledge without tags."""
content = {"insight": "geometric convergence improves efficiency"}
knowledge_id = knowledge_store.store_knowledge(
knowledge_type=KnowledgeType.AGENT_INSIGHT,
content=content,
confidence_level=ConfidenceLevel.VERIFIED,
source_agent="geometry_agent",
domain="mathematics"
# No tags parameter
)
# Verify empty tags are stored
with sqlite3.connect(knowledge_store.storage_path) as conn:
cursor = conn.execute("""
SELECT tags_json FROM knowledge_entries WHERE knowledge_id = ?
""", (knowledge_id,))
tags_json = cursor.fetchone()[0]
assert json.loads(tags_json) == []
def test_retrieve_knowledge_basic(self, knowledge_store):
"""Test basic knowledge retrieval."""
# Store some test knowledge
content1 = {"task": "writing", "result": "success"}
id1 = knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, content1, ConfidenceLevel.HIGH,
"writer", "writing", ["blog"]
)
content2 = {"pattern": "helix_convergence", "accuracy": 0.9}
id2 = knowledge_store.store_knowledge(
KnowledgeType.PATTERN_RECOGNITION, content2, ConfidenceLevel.MEDIUM,
"analyzer", "geometry", ["patterns"]
)
# Retrieve all knowledge
query = KnowledgeQuery(limit=10)
results = knowledge_store.retrieve_knowledge(query)
assert len(results) == 2
assert all(isinstance(entry, KnowledgeEntry) for entry in results)
# Results should be ordered by confidence, success rate, updated_at DESC
assert results[0].confidence_level == ConfidenceLevel.HIGH # Higher confidence first
def test_retrieve_knowledge_by_type(self, knowledge_store):
"""Test knowledge retrieval filtered by type."""
# Store different types of knowledge
knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "success"}, ConfidenceLevel.HIGH,
"agent1", "domain1"
)
knowledge_store.store_knowledge(
KnowledgeType.AGENT_INSIGHT, {"insight": "pattern"}, ConfidenceLevel.MEDIUM,
"agent2", "domain2"
)
knowledge_store.store_knowledge(
KnowledgeType.OPTIMIZATION_DATA, {"metric": 0.8}, ConfidenceLevel.LOW,
"agent3", "domain3"
)
# Query for specific types
query = KnowledgeQuery(
knowledge_types=[KnowledgeType.TASK_RESULT, KnowledgeType.AGENT_INSIGHT]
)
results = knowledge_store.retrieve_knowledge(query)
assert len(results) == 2
result_types = {entry.knowledge_type for entry in results}
assert result_types == {KnowledgeType.TASK_RESULT, KnowledgeType.AGENT_INSIGHT}
def test_retrieve_knowledge_by_domain(self, knowledge_store):
"""Test knowledge retrieval filtered by domain."""
# Store knowledge in different domains
knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "success"}, ConfidenceLevel.HIGH,
"agent1", "writing"
)
knowledge_store.store_knowledge(
KnowledgeType.AGENT_INSIGHT, {"insight": "pattern"}, ConfidenceLevel.MEDIUM,
"agent2", "analysis"
)
knowledge_store.store_knowledge(
KnowledgeType.OPTIMIZATION_DATA, {"metric": 0.8}, ConfidenceLevel.HIGH,
"agent3", "geometry"
)
# Query for specific domains
query = KnowledgeQuery(domains=["writing", "geometry"])
results = knowledge_store.retrieve_knowledge(query)
assert len(results) == 2
result_domains = {entry.domain for entry in results}
assert result_domains == {"writing", "geometry"}
def test_retrieve_knowledge_by_confidence(self, knowledge_store):
"""Test knowledge retrieval filtered by confidence level."""
# Store knowledge with different confidence levels
knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "low"}, ConfidenceLevel.LOW,
"agent1", "domain1"
)
knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "medium"}, ConfidenceLevel.MEDIUM,
"agent2", "domain2"
)
knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "high"}, ConfidenceLevel.HIGH,
"agent3", "domain3"
)
knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "verified"}, ConfidenceLevel.VERIFIED,
"agent4", "domain4"
)
# Query for medium confidence and above
query = KnowledgeQuery(min_confidence=ConfidenceLevel.MEDIUM)
results = knowledge_store.retrieve_knowledge(query)
assert len(results) == 3 # MEDIUM, HIGH, VERIFIED
confidence_levels = {entry.confidence_level for entry in results}
assert ConfidenceLevel.LOW not in confidence_levels
assert all(level in [ConfidenceLevel.MEDIUM, ConfidenceLevel.HIGH, ConfidenceLevel.VERIFIED]
for level in confidence_levels)
def test_retrieve_knowledge_by_success_rate(self, knowledge_store):
"""Test knowledge retrieval filtered by success rate."""
# Store knowledge and update success rates
id1 = knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "poor"}, ConfidenceLevel.HIGH,
"agent1", "domain1"
)
id2 = knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "good"}, ConfidenceLevel.HIGH,
"agent2", "domain2"
)
# Update success rates
knowledge_store.update_success_rate(id1, 0.3) # Low success
knowledge_store.update_success_rate(id2, 0.8) # High success
# Query for high success rate only
query = KnowledgeQuery(min_success_rate=0.7)
results = knowledge_store.retrieve_knowledge(query)
assert len(results) == 1
assert results[0].success_rate >= 0.7
assert results[0].content["result"] == "good"
def test_retrieve_knowledge_by_time_range(self, knowledge_store):
"""Test knowledge retrieval filtered by time range."""
# Store knowledge at different times
with patch('time.time', return_value=1640995200.0): # Jan 1, 2022
id1 = knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "old"}, ConfidenceLevel.HIGH,
"agent1", "domain1"
)
with patch('time.time', return_value=1672531200.0): # Jan 1, 2023
id2 = knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "new"}, ConfidenceLevel.HIGH,
"agent2", "domain2"
)
# Query for entries from 2023 only
query = KnowledgeQuery(time_range=(1672531200.0 - 1, 1672531200.0 + 1))
results = knowledge_store.retrieve_knowledge(query)
assert len(results) == 1
assert results[0].content["result"] == "new"
def test_retrieve_knowledge_by_content_keywords(self, knowledge_store):
"""Test knowledge retrieval filtered by content keywords."""
# Store knowledge with different content
knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT,
{"description": "helix geometry optimization", "metric": 0.9},
ConfidenceLevel.HIGH, "agent1", "geometry"
)
knowledge_store.store_knowledge(
KnowledgeType.AGENT_INSIGHT,
{"description": "blog writing improvements", "efficiency": 0.8},
ConfidenceLevel.MEDIUM, "agent2", "writing"
)
# Query for content containing "helix"
query = KnowledgeQuery(content_keywords=["helix"])
results = knowledge_store.retrieve_knowledge(query)
assert len(results) == 1
assert "helix" in results[0].content["description"]
def test_retrieve_knowledge_by_tags(self, knowledge_store):
"""Test knowledge retrieval filtered by tags."""
# Store knowledge with different tags
knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "success"}, ConfidenceLevel.HIGH,
"agent1", "domain1", ["blog", "writing", "success"]
)
knowledge_store.store_knowledge(
KnowledgeType.AGENT_INSIGHT, {"insight": "pattern"}, ConfidenceLevel.MEDIUM,
"agent2", "domain2", ["analysis", "patterns", "geometry"]
)
# Query for entries with "blog" tag
query = KnowledgeQuery(tags=["blog"])
results = knowledge_store.retrieve_knowledge(query)
assert len(results) == 1
assert "blog" in results[0].tags
def test_retrieve_knowledge_with_limit(self, knowledge_store):
"""Test knowledge retrieval with result limit."""
# Store multiple knowledge entries
for i in range(5):
knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"index": i}, ConfidenceLevel.HIGH,
f"agent{i}", "domain"
)
# Query with limit
query = KnowledgeQuery(limit=3)
results = knowledge_store.retrieve_knowledge(query)
assert len(results) == 3
def test_retrieve_knowledge_access_count_update(self, knowledge_store):
"""Test that access count is incremented during retrieval."""
# Store knowledge
knowledge_id = knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "test"}, ConfidenceLevel.HIGH,
"agent", "domain"
)
# Initial access count should be 0
with sqlite3.connect(knowledge_store.storage_path) as conn:
cursor = conn.execute("""
SELECT access_count FROM knowledge_entries WHERE knowledge_id = ?
""", (knowledge_id,))
assert cursor.fetchone()[0] == 0
# Retrieve knowledge (should increment access count)
query = KnowledgeQuery()
results = knowledge_store.retrieve_knowledge(query)
# Access count should be incremented
with sqlite3.connect(knowledge_store.storage_path) as conn:
cursor = conn.execute("""
SELECT access_count FROM knowledge_entries WHERE knowledge_id = ?
""", (knowledge_id,))
assert cursor.fetchone()[0] == 1
def test_update_success_rate(self, knowledge_store):
"""Test updating success rate for knowledge entry."""
# Store knowledge
knowledge_id = knowledge_store.store_knowledge(
KnowledgeType.OPTIMIZATION_DATA, {"metric": "efficiency"},
ConfidenceLevel.HIGH, "agent", "performance"
)
# Update success rate
result = knowledge_store.update_success_rate(knowledge_id, 0.75)
assert result is True
# Verify update in database
with sqlite3.connect(knowledge_store.storage_path) as conn:
cursor = conn.execute("""
SELECT success_rate, updated_at FROM knowledge_entries
WHERE knowledge_id = ?
""", (knowledge_id,))
row = cursor.fetchone()
assert row[0] == 0.75
assert row[1] > time.time() - 5 # Recently updated
def test_update_success_rate_nonexistent(self, knowledge_store):
"""Test updating success rate for non-existent entry."""
result = knowledge_store.update_success_rate("nonexistent_id", 0.5)
assert result is False
def test_add_related_entry(self, knowledge_store):
"""Test adding relationships between knowledge entries."""
# Store two knowledge entries
id1 = knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "primary"},
ConfidenceLevel.HIGH, "agent1", "domain1"
)
id2 = knowledge_store.store_knowledge(
KnowledgeType.AGENT_INSIGHT, {"insight": "related"},
ConfidenceLevel.MEDIUM, "agent2", "domain2"
)
# Add relationship
result = knowledge_store.add_related_entry(id1, id2)
assert result is True
# Verify relationship in database
with sqlite3.connect(knowledge_store.storage_path) as conn:
cursor = conn.execute("""
SELECT related_entries_json FROM knowledge_entries
WHERE knowledge_id = ?
""", (id1,))
related_json = cursor.fetchone()[0]
related_entries = json.loads(related_json)
assert id2 in related_entries
def test_add_related_entry_duplicate(self, knowledge_store):
"""Test adding duplicate relationship (should not duplicate)."""
# Store knowledge entries and add relationship twice
id1 = knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "primary"},
ConfidenceLevel.HIGH, "agent1", "domain1"
)
id2 = knowledge_store.store_knowledge(
KnowledgeType.AGENT_INSIGHT, {"insight": "related"},
ConfidenceLevel.MEDIUM, "agent2", "domain2"
)
# Add relationship twice
knowledge_store.add_related_entry(id1, id2)
knowledge_store.add_related_entry(id1, id2)
# Verify only one relationship exists
with sqlite3.connect(knowledge_store.storage_path) as conn:
cursor = conn.execute("""
SELECT related_entries_json FROM knowledge_entries
WHERE knowledge_id = ?
""", (id1,))
related_json = cursor.fetchone()[0]
related_entries = json.loads(related_json)
assert related_entries.count(id2) == 1
def test_add_related_entry_nonexistent(self, knowledge_store):
"""Test adding relationship to non-existent entry."""
result = knowledge_store.add_related_entry("nonexistent_id", "some_id")
assert result is False
def test_get_knowledge_summary_empty(self, knowledge_store):
"""Test knowledge summary for empty store."""
summary = knowledge_store.get_knowledge_summary()
assert summary["total_entries"] == 0
assert summary["by_type"] == {}
assert summary["by_domain"] == {}
assert summary["by_confidence"] == {}
assert summary["average_success_rate"] == 0.0
assert "storage_path" in summary
def test_get_knowledge_summary_populated(self, knowledge_store):
"""Test knowledge summary for populated store."""
# Store various types of knowledge
knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result1": "success"},
ConfidenceLevel.HIGH, "agent1", "writing"
)
knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result2": "success"},
ConfidenceLevel.MEDIUM, "agent2", "writing"
)
knowledge_store.store_knowledge(
KnowledgeType.AGENT_INSIGHT, {"insight": "pattern"},
ConfidenceLevel.HIGH, "agent3", "analysis"
)
summary = knowledge_store.get_knowledge_summary()
assert summary["total_entries"] == 3
assert summary["by_type"]["task_result"] == 2
assert summary["by_type"]["agent_insight"] == 1
assert summary["by_domain"]["writing"] == 2
assert summary["by_domain"]["analysis"] == 1
assert summary["by_confidence"]["high"] == 2
assert summary["by_confidence"]["medium"] == 1
assert summary["average_success_rate"] == 1.0 # All default to 1.0
def test_cleanup_old_entries(self, knowledge_store):
"""Test cleanup of old and low-performing entries."""
current_time = time.time()
old_time = current_time - (31 * 24 * 3600) # 31 days ago
recent_time = current_time - (10 * 24 * 3600) # 10 days ago
# Store entries with different ages and success rates
with patch('time.time', return_value=old_time):
old_bad_id = knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "old_bad"},
ConfidenceLevel.LOW, "agent1", "domain1"
)
old_good_id = knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "old_good"},
ConfidenceLevel.HIGH, "agent2", "domain2"
)
with patch('time.time', return_value=recent_time):
recent_id = knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "recent"},
ConfidenceLevel.MEDIUM, "agent3", "domain3"
)
# Update success rates
knowledge_store.update_success_rate(old_bad_id, 0.2) # Low success
knowledge_store.update_success_rate(old_good_id, 0.8) # High success
# Run cleanup (max_age_days=30, min_success_rate=0.3)
deleted_count = knowledge_store.cleanup_old_entries(
max_age_days=30, min_success_rate=0.3
)
# Should delete old entry with low success rate
assert deleted_count == 1
# Verify remaining entries
query = KnowledgeQuery()
remaining = knowledge_store.retrieve_knowledge(query)
remaining_results = {entry.content.get("result") for entry in remaining}
assert "old_bad" not in remaining_results # Should be deleted
assert "old_good" in remaining_results # Should remain (good success rate)
assert "recent" in remaining_results # Should remain (recent)
def test_cleanup_unused_entries(self, knowledge_store):
"""Test cleanup of unused entries (zero access count)."""
current_time = time.time()
old_time = current_time - (31 * 24 * 3600) # 31 days ago
# Store old entry that's never been accessed
with patch('time.time', return_value=old_time):
unused_id = knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "unused"},
ConfidenceLevel.HIGH, "agent1", "domain1"
)
# Store entry and access it
accessed_id = knowledge_store.store_knowledge(
KnowledgeType.TASK_RESULT, {"result": "accessed"},
ConfidenceLevel.HIGH, "agent2", "domain2"
)
# Access the second entry to increment access count
query = KnowledgeQuery()
knowledge_store.retrieve_knowledge(query)
# Run cleanup - should delete unused old entry
deleted_count = knowledge_store.cleanup_old_entries(max_age_days=30)
assert deleted_count == 1 # Unused old entry deleted
class TestIntegrationScenarios:
"""Test integration scenarios for knowledge store system."""
@pytest.fixture
def temp_db_path(self):
"""Create temporary database file for testing."""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as temp_file:
temp_path = temp_file.name
yield temp_path
if os.path.exists(temp_path):
os.unlink(temp_path)
def test_blog_writing_knowledge_lifecycle(self, temp_db_path):
"""Test complete knowledge lifecycle for blog writing scenario."""
store = KnowledgeStore(storage_path=temp_db_path)
# 1. Store initial task result
task_result_id = store.store_knowledge(
knowledge_type=KnowledgeType.TASK_RESULT,
content={
"task": "blog_writing",
"topic": "AI ethics",
"word_count": 1500,
"quality_score": 0.85,
"completion_time": 45.2,
"agent_count": 5
},
confidence_level=ConfidenceLevel.HIGH,
source_agent="blog_coordinator",
domain="writing",
tags=["blog", "AI", "ethics", "successful"]
)
# 2. Store agent insight about effective collaboration
insight_id = store.store_knowledge(
knowledge_type=KnowledgeType.AGENT_INSIGHT,
content={
"insight": "helix_convergence_pattern",
"description": "Research agents spawning early improved topic coverage",
"effectiveness": 0.92,
"pattern_data": {
"research_agents": 2,
"analysis_agents": 2,
"synthesis_agents": 1,
"spawn_timing": [0.1, 0.3, 0.5, 0.7, 0.9]
}
},
confidence_level=ConfidenceLevel.VERIFIED,
source_agent="pattern_analyzer",
domain="coordination",
tags=["helix", "spawning", "collaboration", "patterns"]
)
# 3. Store optimization data
optimization_id = store.store_knowledge(
knowledge_type=KnowledgeType.OPTIMIZATION_DATA,
content={
"optimization_target": "completion_time",
"baseline": 60.0,
"optimized": 45.2,
"improvement": 0.247,
"technique": "dynamic_spawning",
"parameters": {
"confidence_threshold": 0.7,
"max_agents": 5,
"spawn_interval": 0.2
}
},
confidence_level=ConfidenceLevel.HIGH,
source_agent="optimizer",
domain="performance",
tags=["optimization", "timing", "dynamic_spawning"]
)
# 4. Add relationships between entries
store.add_related_entry(task_result_id, insight_id)
store.add_related_entry(task_result_id, optimization_id)
store.add_related_entry(insight_id, optimization_id)
# 5. Query for similar tasks (blog writing)
blog_query = KnowledgeQuery(
domains=["writing"],
tags=["blog"],
min_confidence=ConfidenceLevel.MEDIUM
)
blog_results = store.retrieve_knowledge(blog_query)
assert len(blog_results) == 1
assert blog_results[0].content["task"] == "blog_writing"
assert "AI ethics" in blog_results[0].content["topic"]
# 6. Query for optimization insights
optimization_query = KnowledgeQuery(
knowledge_types=[KnowledgeType.OPTIMIZATION_DATA],
content_keywords=["dynamic_spawning"]
)
opt_results = store.retrieve_knowledge(optimization_query)
assert len(opt_results) == 1
assert opt_results[0].content["improvement"] > 0.2
# 7. Query for coordination patterns
pattern_query = KnowledgeQuery(
knowledge_types=[KnowledgeType.AGENT_INSIGHT],
tags=["helix", "patterns"]
)
pattern_results = store.retrieve_knowledge(pattern_query)
assert len(pattern_results) == 1
assert "helix_convergence_pattern" in pattern_results[0].content["insight"]
# 8. Update success rates based on real usage
store.update_success_rate(task_result_id, 0.9) # Very successful
store.update_success_rate(insight_id, 0.95) # Extremely valuable
store.update_success_rate(optimization_id, 0.85) # Good results
# 9. Get knowledge summary
summary = store.get_knowledge_summary()
assert summary["total_entries"] == 3
assert summary["by_domain"]["writing"] == 1
assert summary["by_domain"]["coordination"] == 1
assert summary["by_domain"]["performance"] == 1
assert summary["average_success_rate"] > 0.85
def test_cross_domain_knowledge_search(self, temp_db_path):
"""Test searching for knowledge across multiple domains."""
store = KnowledgeStore(storage_path=temp_db_path)
# Store knowledge across different domains but related concepts
# Technical domain - helix geometry
store.store_knowledge(
KnowledgeType.PATTERN_RECOGNITION,
{
"pattern": "geometric_convergence",
"mathematical_model": "parametric_helix",
"concentration_ratio": 4119,
"precision": 1e-12
},
ConfidenceLevel.VERIFIED,
"geometry_agent",
"mathematics",
["helix", "geometry", "convergence", "precision"]
)
# Performance domain - efficiency gains
store.store_knowledge(
KnowledgeType.OPTIMIZATION_DATA,
{
"optimization": "task_distribution",
"architecture": "helix_spoke",
"efficiency_gain": 0.441,
"statistical_significance": 0.0441,
"comparison": "vs_linear_pipeline"
},
ConfidenceLevel.HIGH,
"performance_agent",
"performance",
["helix", "efficiency", "task_distribution", "statistical"]
)
# Coordination domain - agent spawning
store.store_knowledge(
KnowledgeType.AGENT_INSIGHT,
{
"coordination_pattern": "temporal_spawning",
"helix_position": "all_spawn_at_top",
"attention_focusing": "natural_convergence",
"spawn_timing": "different_times_same_geometry"
},
ConfidenceLevel.HIGH,
"coordinator_agent",
"coordination",
["helix", "spawning", "attention", "coordination"]
)
# Search across domains for helix-related knowledge
helix_query = KnowledgeQuery(
tags=["helix"],
min_confidence=ConfidenceLevel.MEDIUM
)
helix_results = store.retrieve_knowledge(helix_query)
assert len(helix_results) == 3
domains = {entry.domain for entry in helix_results}
assert domains == {"mathematics", "performance", "coordination"}
# Search for high-precision, verified knowledge
precision_query = KnowledgeQuery(
content_keywords=["precision", "statistical"],
min_confidence=ConfidenceLevel.HIGH
)
precision_results = store.retrieve_knowledge(precision_query)
assert len(precision_results) == 2 # Mathematics and performance entries
# Search for coordination-specific insights
coordination_query = KnowledgeQuery(
domains=["coordination"],
knowledge_types=[KnowledgeType.AGENT_INSIGHT]
)
coord_results = store.retrieve_knowledge(coordination_query)
assert len(coord_results) == 1
assert "temporal_spawning" in coord_results[0].content["coordination_pattern"]
def test_knowledge_evolution_over_time(self, temp_db_path):
"""Test how knowledge evolves and gets refined over time."""
store = KnowledgeStore(storage_path=temp_db_path)
# Initial hypothesis with low confidence
hypothesis_time = time.time() - (10 * 24 * 3600) # 10 days ago
with patch('time.time', return_value=hypothesis_time):
hypothesis_id = store.store_knowledge(
KnowledgeType.DOMAIN_EXPERTISE,
{
"hypothesis": "helix_better_than_mesh",
"initial_evidence": "theoretical_analysis",
"confidence_factors": ["geometric_elegance", "o_n_complexity"],
"uncertainty": "lacks_empirical_validation"
},
ConfidenceLevel.LOW,
"theorist_agent",
"research",
["hypothesis", "helix", "mesh", "theory"]
)
# Experimental results with medium confidence
experiment_time = time.time() - (5 * 24 * 3600) # 5 days ago
with patch('time.time', return_value=experiment_time):
experiment_id = store.store_knowledge(
KnowledgeType.OPTIMIZATION_DATA,
{
"experiment": "helix_vs_mesh_performance",
"metrics": {
"task_distribution_efficiency": 0.441,
"memory_efficiency": 0.75,
"communication_overhead": "inconclusive"
},
"statistical_significance": 0.0441,
"sample_size": 100
},
ConfidenceLevel.MEDIUM,
"experimenter_agent",
"validation",
["experiment", "helix", "mesh", "performance", "statistical"]
)
# Verified conclusion with high confidence
conclusion_time = time.time() - (1 * 24 * 3600) # 1 day ago
with patch('time.time', return_value=conclusion_time):
conclusion_id = store.store_knowledge(
KnowledgeType.DOMAIN_EXPERTISE,
{
"conclusion": "helix_advantages_validated",
"validated_benefits": [
"task_distribution_efficiency",
"memory_efficiency",
"natural_attention_focusing"
],
"evidence_base": ["theoretical", "experimental", "statistical"],
"publication_ready": True,
"confidence_score": 0.95
},
ConfidenceLevel.VERIFIED,
"validator_agent",
"research",
["conclusion", "helix", "validated", "publication"]
)
# Link related knowledge
store.add_related_entry(hypothesis_id, experiment_id)
store.add_related_entry(experiment_id, conclusion_id)
store.add_related_entry(hypothesis_id, conclusion_id)
# Update success rates based on validation
store.update_success_rate(hypothesis_id, 0.8) # Good starting point
store.update_success_rate(experiment_id, 0.9) # Solid experimental work
store.update_success_rate(conclusion_id, 0.95) # Validated conclusion
# Query for evolution of helix research
research_evolution = KnowledgeQuery(
domains=["research", "validation"],
tags=["helix"],
time_range=(hypothesis_time - 3600, conclusion_time + 3600) # Full range
)
evolution_results = store.retrieve_knowledge(research_evolution)
assert len(evolution_results) == 3
# Results should be ordered by confidence level (desc)
confidence_levels = [entry.confidence_level for entry in evolution_results]
assert confidence_levels[0] == ConfidenceLevel.VERIFIED # Most confident first
# Query for high-confidence, validated knowledge only
validated_query = KnowledgeQuery(
min_confidence=ConfidenceLevel.HIGH,
content_keywords=["validated", "publication"]
)
validated_results = store.retrieve_knowledge(validated_query)
assert len(validated_results) == 1
assert validated_results[0].content["publication_ready"] is True
assert validated_results[0].confidence_level == ConfidenceLevel.VERIFIED