""" Evaluation Framework for Pre-Visit Summary Agent This package provides tools to evaluate the accuracy of the pre-visit summary agent's data retrieval and reporting. Modules: test_generator: Generates test cases from Synthea database expected_values: Computes ground truth values from database evaluator: Compares agent facts vs expected values metrics: Aggregates results and computes summary statistics facts_schema: Defines structured output format for agent run_evaluation: Main entry point for running evaluations Usage: # Run direct evaluation (validates framework) python -m evaluation.run_evaluation --mode direct --patients 10 # Run simulated evaluation (tests error detection) python -m evaluation.run_evaluation --mode simulated --error-rate 0.15 """ from .test_generator import generate_all_test_cases, get_test_summary from .expected_values import compute_expected_values from .evaluator import evaluate_case, CaseEvaluation from .metrics import aggregate_metrics, format_report, EvaluationMetrics __all__ = [ "generate_all_test_cases", "get_test_summary", "compute_expected_values", "evaluate_case", "CaseEvaluation", "aggregate_metrics", "format_report", "EvaluationMetrics" ]