frabbani
Fix fact extraction - pass raw data for simple tools.......
8daa8bf
"""
Evaluation Framework for Pre-Visit Summary Agent
This package provides tools to evaluate the accuracy of the
pre-visit summary agent's data retrieval and reporting.
Modules:
test_generator: Generates test cases from Synthea database
expected_values: Computes ground truth values from database
evaluator: Compares agent facts vs expected values
metrics: Aggregates results and computes summary statistics
facts_schema: Defines structured output format for agent
run_evaluation: Main entry point for running evaluations
Usage:
# Run direct evaluation (validates framework)
python -m evaluation.run_evaluation --mode direct --patients 10
# Run simulated evaluation (tests error detection)
python -m evaluation.run_evaluation --mode simulated --error-rate 0.15
"""
from .test_generator import generate_all_test_cases, get_test_summary
from .expected_values import compute_expected_values
from .evaluator import evaluate_case, CaseEvaluation
from .metrics import aggregate_metrics, format_report, EvaluationMetrics
__all__ = [
"generate_all_test_cases",
"get_test_summary",
"compute_expected_values",
"evaluate_case",
"CaseEvaluation",
"aggregate_metrics",
"format_report",
"EvaluationMetrics"
]