UserSyncUI / tests /unit /test_profiling.py
harvesthealth's picture
Upload folder using huggingface_hub
f6686e1 verified
import pytest
import logging
import pandas as pd
import numpy as np
from collections import Counter
from unittest.mock import Mock, patch
logger = logging.getLogger("tinytroupe")
import sys
sys.path.insert(0, '../../tinytroupe/') # ensures that the package is imported from the parent directory
sys.path.insert(0, '../../')
sys.path.insert(0, '..')
from tinytroupe.profiling import Profiler
from tinytroupe.agent import TinyPerson
from tinytroupe.examples import create_oscar_the_architect, create_lisa_the_data_scientist
from testing_utils import *
class TestProfiler:
"""Test suite for the enhanced Profiler class."""
@pytest.fixture
def sample_agent_dicts(self):
"""Create sample agent data as dictionaries for testing."""
return [
{
"name": "Alice",
"age": 28,
"nationality": "American",
"occupation": {"title": "Software Engineer"},
"actions_count": 15,
"stimuli_count": 20,
"social_connections": 5,
"current_emotions": "Happy and motivated",
"current_goals": ["Complete project", "Learn new skills"],
"big_five": {
"openness": "High. Very creative and curious.",
"conscientiousness": "High. Well organized.",
"extraversion": "Medium. Socially comfortable.",
"agreeableness": "High. Very cooperative.",
"neuroticism": "Low. Emotionally stable."
}
},
{
"name": "Bob",
"age": 35,
"nationality": "Canadian",
"occupation": {"title": "Data Scientist"},
"actions_count": 22,
"stimuli_count": 18,
"social_connections": 3,
"current_emotions": "Focused and analytical",
"current_goals": ["Analyze data", "Present findings"],
"big_five": {
"openness": "High. Loves learning.",
"conscientiousness": "High. Very methodical.",
"extraversion": "Low. Prefers working alone.",
"agreeableness": "Medium. Collaborative when needed.",
"neuroticism": "Low. Calm under pressure."
}
},
{
"name": "Carol",
"age": 42,
"nationality": "British",
"occupation": {"title": "Marketing Manager"},
"actions_count": 30,
"stimuli_count": 25,
"social_connections": 8,
"current_emotions": "Enthusiastic and creative",
"current_goals": ["Launch campaign", "Increase brand awareness"],
"big_five": {
"openness": "High. Very imaginative.",
"conscientiousness": "Medium. Generally organized.",
"extraversion": "High. Very outgoing.",
"agreeableness": "High. Team player.",
"neuroticism": "Medium. Sometimes stressed."
}
}
]
@pytest.fixture
def profiler(self):
"""Create a basic profiler instance for testing."""
return Profiler(attributes=["age", "nationality", "occupation.title"])
@pytest.fixture
def advanced_profiler(self):
"""Create a profiler with more attributes for advanced testing."""
return Profiler(attributes=["age", "nationality", "occupation.title", "actions_count", "social_connections"])
def test_profiler_initialization(self):
"""Test that the profiler initializes correctly."""
profiler = Profiler()
assert profiler.attributes == ["age", "occupation.title", "nationality"]
assert profiler.attributes_distributions == {}
assert profiler.agents_data is None
assert profiler.analysis_results == {}
def test_profiler_custom_attributes(self):
"""Test profiler initialization with custom attributes."""
custom_attrs = ["name", "age", "occupation.title"]
profiler = Profiler(attributes=custom_attrs)
assert profiler.attributes == custom_attrs
def test_prepare_agent_data_with_dicts(self, profiler, sample_agent_dicts):
"""Test data preparation with dictionary agents."""
processed_data = profiler._prepare_agent_data(sample_agent_dicts)
assert len(processed_data) == 3
assert all(isinstance(agent, dict) for agent in processed_data)
assert processed_data[0]["name"] == "Alice"
assert processed_data[1]["age"] == 35
assert processed_data[2]["nationality"] == "British"
def test_get_nested_attribute(self, profiler):
"""Test nested attribute extraction."""
agent = {
"name": "Test",
"occupation": {"title": "Engineer", "company": "TechCorp"},
"skills": ["Python", "ML"]
}
# Test simple attribute
assert profiler._get_nested_attribute(agent, "name") == "Test"
# Test nested attribute
assert profiler._get_nested_attribute(agent, "occupation.title") == "Engineer"
assert profiler._get_nested_attribute(agent, "occupation.company") == "TechCorp"
# Test non-existent attribute
assert profiler._get_nested_attribute(agent, "nonexistent") is None
assert profiler._get_nested_attribute(agent, "occupation.nonexistent") is None
def test_compute_attribute_distribution(self, profiler, sample_agent_dicts):
"""Test attribute distribution computation."""
# Test age distribution
age_dist = profiler._compute_attribute_distribution(sample_agent_dicts, "age")
assert isinstance(age_dist, pd.Series)
assert len(age_dist) == 3 # 3 unique ages
assert age_dist[28] == 1
assert age_dist[35] == 1
assert age_dist[42] == 1
def test_compute_attribute_distribution_nested(self, profiler, sample_agent_dicts):
"""Test nested attribute distribution computation."""
# Test occupation.title distribution
occ_dist = profiler._compute_attribute_distribution(sample_agent_dicts, "occupation.title")
assert isinstance(occ_dist, pd.Series)
assert len(occ_dist) == 3 # 3 different occupations
assert "Software Engineer" in occ_dist.index
assert "Data Scientist" in occ_dist.index
assert "Marketing Manager" in occ_dist.index
def test_basic_profiling(self, profiler, sample_agent_dicts):
"""Test basic profiling functionality."""
# Mock matplotlib to avoid display issues in tests
with patch('matplotlib.pyplot.show'):
results = profiler.profile(sample_agent_dicts, plot=False, advanced_analysis=False)
assert "distributions" in results
assert "summary_stats" in results
# Check that distributions were computed
assert "age" in results["distributions"]
assert "nationality" in results["distributions"]
assert "occupation.title" in results["distributions"]
# Check summary stats
assert results["summary_stats"]["total_agents"] == 3
assert results["summary_stats"]["attributes_analyzed"] == 3
def test_advanced_profiling(self, advanced_profiler, sample_agent_dicts):
"""Test advanced profiling with statistical analysis."""
with patch('matplotlib.pyplot.show'):
results = advanced_profiler.profile(sample_agent_dicts, plot=False, advanced_analysis=True)
assert "analysis" in results
assert "demographics" in results["analysis"]
assert "behavioral_patterns" in results["analysis"]
assert "social_analysis" in results["analysis"]
assert "personality_clusters" in results["analysis"]
assert "correlations" in results["analysis"]
def test_demographics_analysis(self, profiler, sample_agent_dicts):
"""Test demographic analysis functionality."""
profiler.agents_data = sample_agent_dicts
demographics = profiler._analyze_demographics()
# Test age statistics
assert "age_stats" in demographics
age_stats = demographics["age_stats"]
assert age_stats["mean"] == pytest.approx((28 + 35 + 42) / 3, rel=1e-2)
assert age_stats["median"] == 35
assert age_stats["range"] == (28, 42)
# Test occupation diversity
assert "occupation_diversity" in demographics
occ_div = demographics["occupation_diversity"]
assert occ_div["unique_count"] == 3
assert len(occ_div["most_common"]) <= 5
def test_behavioral_analysis(self, profiler, sample_agent_dicts):
"""Test behavioral pattern analysis."""
profiler.agents_data = sample_agent_dicts
behavioral = profiler._analyze_behavioral_patterns()
# Test activity levels
assert "activity_levels" in behavioral
activity = behavioral["activity_levels"]
expected_actions_mean = (15 + 22 + 30) / 3
expected_stimuli_mean = (20 + 18 + 25) / 3
assert activity["actions_mean"] == pytest.approx(expected_actions_mean, rel=1e-2)
assert activity["stimuli_mean"] == pytest.approx(expected_stimuli_mean, rel=1e-2)
assert activity["activity_ratio"] > 0
def test_social_analysis(self, profiler, sample_agent_dicts):
"""Test social pattern analysis."""
profiler.agents_data = sample_agent_dicts
social = profiler._analyze_social_patterns()
# Test connectivity analysis
assert "connectivity" in social
connectivity = social["connectivity"]
expected_avg_connections = (5 + 3 + 8) / 3
assert connectivity["avg_connections"] == pytest.approx(expected_avg_connections, rel=1e-2)
assert connectivity["social_isolation_rate"] == 0 # No isolated agents in sample
def test_personality_analysis(self, profiler, sample_agent_dicts):
"""Test personality clustering analysis."""
profiler.agents_data = sample_agent_dicts
personality = profiler._analyze_personality_clusters()
# Should have trait analysis since we have Big Five data
assert "trait_analysis" in personality
trait_analysis = personality["trait_analysis"]
assert "average_traits" in trait_analysis
assert "trait_correlations" in trait_analysis
assert "dominant_traits" in trait_analysis
# Check that we have all Big Five traits
avg_traits = trait_analysis["average_traits"]
expected_traits = ["openness", "conscientiousness", "extraversion", "agreeableness", "neuroticism"]
for trait in expected_traits:
assert trait in avg_traits
def test_correlation_analysis(self, profiler, sample_agent_dicts):
"""Test correlation analysis."""
profiler.agents_data = sample_agent_dicts
correlations = profiler._analyze_correlations()
assert "correlation_matrix" in correlations
assert "numerical_correlations" in correlations
# Check that correlation matrix is properly formatted
corr_matrix = correlations["correlation_matrix"]
assert isinstance(corr_matrix, dict)
def test_diversity_index_calculation(self, profiler):
"""Test Shannon diversity index calculation."""
# Test with uniform distribution
uniform_counts = Counter(["A", "B", "C", "D"])
diversity = profiler._calculate_diversity_index(uniform_counts)
assert diversity == pytest.approx(1.0, rel=1e-2) # Maximum diversity
# Test with single item
single_counts = Counter(["A", "A", "A", "A"])
diversity = profiler._calculate_diversity_index(single_counts)
assert diversity == 0.0 # No diversity
# Test with empty counter
empty_counts = Counter()
diversity = profiler._calculate_diversity_index(empty_counts)
assert diversity == 0.0
def test_connectivity_categorization(self, profiler):
"""Test social connectivity categorization."""
connections = [0, 1, 2, 3, 5, 6, 8, 10]
categories = profiler._categorize_connectivity(connections)
assert categories["isolated"] == 1 # 0 connections
assert categories["low"] == 2 # 1, 2 connections
assert categories["medium"] == 2 # 3, 5 connections
assert categories["high"] == 3 # 6, 8, 10 connections
def test_normality_test(self, profiler):
"""Test normality testing function."""
# Test with normal-ish data
normal_data = [1, 2, 3, 4, 5, 4, 3, 2, 1]
result = profiler._test_normality(normal_data)
assert result == True
# Test with skewed data
skewed_data = [1, 1, 1, 1, 10, 10, 10]
result = profiler._test_normality(skewed_data)
assert result == False
# Test with insufficient data
small_data = [1, 2]
result = profiler._test_normality(small_data)
assert result == False
def test_dominant_traits_identification(self, profiler):
"""Test identification of dominant personality traits."""
# Create test data with clear dominant traits
traits_data = pd.DataFrame([
{"openness": 0.8, "conscientiousness": 0.3, "extraversion": 0.5},
{"openness": 0.9, "conscientiousness": 0.2, "extraversion": 0.6},
{"openness": 0.7, "conscientiousness": 0.4, "extraversion": 0.4}
])
dominant = profiler._identify_dominant_traits(traits_data)
assert dominant["openness"] == "high" # Mean ~0.8
assert dominant["conscientiousness"] == "low" # Mean ~0.3
assert dominant["extraversion"] == "moderate" # Mean ~0.5
def test_summary_statistics_generation(self, profiler, sample_agent_dicts):
"""Test summary statistics generation."""
profiler.agents_data = sample_agent_dicts
summary = profiler._generate_summary_statistics()
assert summary["total_agents"] == 3
assert summary["attributes_analyzed"] == 3
assert "data_completeness" in summary
# Check data completeness calculation
completeness = summary["data_completeness"]
assert completeness["age"] == 1.0 # All agents have age
assert completeness["nationality"] == 1.0 # All agents have nationality
assert completeness["occupation.title"] == 1.0 # All agents have occupation.title
def test_export_analysis_report(self, profiler, sample_agent_dicts, tmp_path):
"""Test analysis report export functionality."""
# Run analysis first
with patch('matplotlib.pyplot.show'):
profiler.profile(sample_agent_dicts, plot=False, advanced_analysis=True)
# Export report to temporary file
report_file = tmp_path / "test_report.txt"
profiler.export_analysis_report(str(report_file))
# Check that file was created and contains expected content
assert report_file.exists()
content = report_file.read_text()
assert "AGENT POPULATION ANALYSIS REPORT" in content
assert "Total Agents Analyzed: 3" in content
assert "DEMOGRAPHICS" in content
assert "BEHAVIORAL PATTERNS" in content
def test_add_custom_analysis(self, profiler, sample_agent_dicts):
"""Test adding custom analysis functions."""
def custom_analysis(agents_data):
return {"custom_metric": len(agents_data) * 2}
profiler.add_custom_analysis("test_analysis", custom_analysis)
assert hasattr(profiler, '_custom_analyses')
assert "test_analysis" in profiler._custom_analyses
assert profiler._custom_analyses["test_analysis"] == custom_analysis
def test_compare_populations(self, profiler, sample_agent_dicts):
"""Test population comparison functionality."""
# Create a second population
other_agents = [
{"name": "Dave", "age": 30, "nationality": "German", "occupation": {"title": "Designer"}},
{"name": "Eve", "age": 25, "nationality": "French", "occupation": {"title": "Writer"}}
]
# Run initial profiling
with patch('matplotlib.pyplot.show'):
profiler.profile(sample_agent_dicts, plot=False, advanced_analysis=False)
# Compare populations
with patch('matplotlib.pyplot.show'):
comparison = profiler.compare_populations(other_agents)
assert "population_sizes" in comparison
assert comparison["population_sizes"]["current"] == 3
assert comparison["population_sizes"]["comparison"] == 2
assert "attribute_comparisons" in comparison
# Should have comparisons for overlapping attributes
for attr in ["age", "nationality", "occupation.title"]:
if attr in comparison["attribute_comparisons"]:
attr_comp = comparison["attribute_comparisons"][attr]
assert "current_unique_values" in attr_comp
assert "comparison_unique_values" in attr_comp
@patch('matplotlib.pyplot.show')
def test_plotting_functions(self, mock_show, profiler, sample_agent_dicts):
"""Test that plotting functions run without errors."""
# Run profiling with plots enabled
results = profiler.profile(sample_agent_dicts, plot=True, advanced_analysis=True)
# Verify that matplotlib show was called (plots were generated)
assert mock_show.called
# Test individual plotting methods
profiler._plot_basic_distributions()
profiler._plot_advanced_analysis()
if 'demographics' in profiler.analysis_results:
profiler._plot_demographics()
if 'behavioral_patterns' in profiler.analysis_results:
profiler._plot_behavioral_patterns()
if ('correlations' in profiler.analysis_results and
'correlation_matrix' in profiler.analysis_results['correlations']):
profiler._plot_correlation_heatmap()
def test_error_handling_empty_data(self, profiler):
"""Test error handling with empty or invalid data."""
# Test with empty list
with patch('matplotlib.pyplot.show'):
results = profiler.profile([], plot=False, advanced_analysis=False)
assert results["summary_stats"]["total_agents"] == 0
# Test with agents missing attributes
incomplete_agents = [{"name": "Incomplete"}] # Missing most attributes
with patch('matplotlib.pyplot.show'):
results = profiler.profile(incomplete_agents, plot=False, advanced_analysis=False)
# Should handle gracefully
assert results["summary_stats"]["total_agents"] == 1
def test_mixed_data_types(self, profiler):
"""Test handling of mixed data types in attributes."""
mixed_agents = [
{"age": 25, "occupation": "Engineer"}, # occupation as string
{"age": 30, "occupation": {"title": "Scientist"}}, # occupation as dict
{"age": "35", "occupation": {"title": "Manager"}} # age as string
]
with patch('matplotlib.pyplot.show'):
results = profiler.profile(mixed_agents, plot=False, advanced_analysis=False)
# Should handle mixed types gracefully
assert results["summary_stats"]["total_agents"] == 3
@pytest.mark.skipif(TinyPerson is None, reason="TinyPerson not available")
def test_tinyperson_integration(self, profiler):
"""Test integration with actual TinyPerson objects."""
# Clear any existing agents to avoid name conflicts
TinyPerson.clear_agents()
# Create TinyPerson objects
oscar = create_oscar_the_architect()
lisa = create_lisa_the_data_scientist()
agents = [oscar, lisa]
with patch('matplotlib.pyplot.show'):
results = profiler.profile(agents, plot=False, advanced_analysis=True)
assert results["summary_stats"]["total_agents"] == 2
# Check that TinyPerson-specific data was extracted
agent_data = profiler.agents_data[0]
assert "name" in agent_data # Should have extracted name from persona
# Should have attempted to extract behavioral and social data
# (exact values depend on the specific TinyPerson state)
def test_large_population_performance(self, profiler):
"""Test profiler performance with larger populations."""
# Create a larger dataset
large_population = []
for i in range(100):
agent = {
"name": f"Agent_{i}",
"age": 20 + (i % 50),
"nationality": ["American", "Canadian", "British", "German", "French"][i % 5],
"occupation": {"title": ["Engineer", "Scientist", "Manager", "Designer", "Writer"][i % 5]},
"actions_count": i % 20,
"stimuli_count": (i + 5) % 25,
"social_connections": i % 10
}
large_population.append(agent)
with patch('matplotlib.pyplot.show'):
results = profiler.profile(large_population, plot=False, advanced_analysis=True)
assert results["summary_stats"]["total_agents"] == 100
# Check that analysis completed successfully
assert "demographics" in results["analysis"]
assert "behavioral_patterns" in results["analysis"]
if __name__ == "__main__":
pytest.main([__file__])