hackathon_code4change / tests /test_gap_fixes.py
RoyAalekh's picture
refactored project structure. renamed scheduler dir to src
6a28f91
"""Test script to validate both gap fixes.
Tests:
1. Gap 1: RL training uses EDA-derived parameters
2. Gap 2: Ripeness feedback loop works
"""
from datetime import date, datetime
from rl.config import RLTrainingConfig
from rl.simple_agent import TabularQAgent
from rl.training import RLTrainingEnvironment, train_agent
from src.core.ripeness import RipenessClassifier, RipenessStatus
from src.data.case_generator import CaseGenerator
from src.data.param_loader import ParameterLoader
from src.monitoring.ripeness_calibrator import RipenessCalibrator
from src.monitoring.ripeness_metrics import RipenessMetrics
def test_gap1_eda_alignment():
"""Test that RL training uses EDA-derived parameters."""
print("\n" + "=" * 70)
print("GAP 1: Testing EDA Alignment in RL Training")
print("=" * 70)
# Generate test cases
generator = CaseGenerator(
start=date(2024, 1, 1),
end=date(2024, 1, 31),
seed=42,
)
cases = generator.generate(100, stage_mix_auto=True)
# Create environment with param_loader
env = RLTrainingEnvironment(
cases=cases,
start_date=date(2024, 1, 1),
horizon_days=30,
)
# Verify param_loader exists
assert hasattr(env, "param_loader"), "Environment should have param_loader"
assert isinstance(env.param_loader, ParameterLoader), (
"param_loader should be ParameterLoader instance"
)
print("ParameterLoader successfully integrated into RLTrainingEnvironment")
# Test hearing outcome simulation uses EDA parameters
test_case = cases[0]
test_case.current_stage = "ADMISSION"
test_case.case_type = "RSA"
# Get EDA-derived adjournment probability
p_adj_eda = env.param_loader.get_adjournment_prob("ADMISSION", "RSA")
print(f"EDA adjournment probability for ADMISSION/RSA: {p_adj_eda:.2%}")
# Simulate outcomes multiple times and check alignment
outcomes = []
for _ in range(100):
outcome = env._simulate_hearing_outcome(test_case)
outcomes.append(outcome)
adjourn_rate = sum(1 for o in outcomes if o == "ADJOURNED") / len(outcomes)
print(f"Simulated adjournment rate: {adjourn_rate:.2%}")
print(f" Difference from EDA: {abs(adjourn_rate - p_adj_eda):.2%}")
# Should be within 15% of EDA value (stochastic sampling)
assert abs(adjourn_rate - p_adj_eda) < 0.15, (
f"Adjournment rate {adjourn_rate:.2%} too far from EDA {p_adj_eda:.2%}"
)
print("\n✅ GAP 1 FIXED: RL training now uses EDA-derived parameters\n")
def test_gap2_ripeness_feedback():
"""Test that ripeness feedback loop works."""
print("\n" + "=" * 70)
print("GAP 2: Testing Ripeness Feedback Loop")
print("=" * 70)
# Create metrics tracker
metrics = RipenessMetrics()
# Simulate predictions and outcomes (need 50+ for calibrator)
test_cases = []
# Pattern: 50% false positives (RIPE but adjourned), 50% false negatives
for i in range(50):
if i % 4 == 0:
test_cases.append((f"case{i}", RipenessStatus.RIPE, False)) # Correct RIPE
elif i % 4 == 1:
test_cases.append((f"case{i}", RipenessStatus.RIPE, True)) # False positive
elif i % 4 == 2:
test_cases.append(
(f"case{i}", RipenessStatus.UNRIPE_SUMMONS, True)
) # Correct UNRIPE
else:
test_cases.append(
(f"case{i}", RipenessStatus.UNRIPE_SUMMONS, False)
) # False negative
prediction_date = datetime(2024, 1, 1)
outcome_date = datetime(2024, 1, 2)
for case_id, predicted_status, was_adjourned in test_cases:
metrics.record_prediction(case_id, predicted_status, prediction_date)
actual_outcome = "ADJOURNED" if was_adjourned else "ARGUMENTS"
metrics.record_outcome(case_id, actual_outcome, was_adjourned, outcome_date)
print(f"Recorded {len(test_cases)} predictions and outcomes")
# Get accuracy metrics
accuracy = metrics.get_accuracy_metrics()
print("\n Accuracy Metrics:")
print(f" False positive rate: {accuracy['false_positive_rate']:.1%}")
print(f" False negative rate: {accuracy['false_negative_rate']:.1%}")
print(f" RIPE precision: {accuracy['ripe_precision']:.1%}")
print(f" UNRIPE recall: {accuracy['unripe_recall']:.1%}")
# Expected: 2/4 false positives (50%), 1/2 false negatives (50%)
assert accuracy["false_positive_rate"] > 0.4, "Should detect false positives"
assert accuracy["false_negative_rate"] > 0.4, "Should detect false negatives"
print("\nRipenessMetrics successfully tracks classification accuracy")
# Test calibrator
adjustments = RipenessCalibrator.analyze_metrics(metrics)
print(f"\nRipenessCalibrator generated {len(adjustments)} adjustment suggestions:")
for adj in adjustments:
print(
f" - {adj.threshold_name}: {adj.current_value}{adj.suggested_value}"
)
print(f" Reason: {adj.reason[:80]}...")
assert len(adjustments) > 0, "Should suggest at least one adjustment"
# Test threshold configuration
original_thresholds = RipenessClassifier.get_current_thresholds()
print(f"\nCurrent thresholds: {original_thresholds}")
# Apply test adjustment
test_thresholds = {"MIN_SERVICE_HEARINGS": 2}
RipenessClassifier.set_thresholds(test_thresholds)
new_thresholds = RipenessClassifier.get_current_thresholds()
assert new_thresholds["MIN_SERVICE_HEARINGS"] == 2, "Threshold should be updated"
print(f"Thresholds successfully updated: {new_thresholds}")
# Restore original
RipenessClassifier.set_thresholds(original_thresholds)
print("\n✅ GAP 2 FIXED: Ripeness feedback loop fully operational\n")
def test_end_to_end():
"""Quick end-to-end test with small training run."""
print("\n" + "=" * 70)
print("END-TO-END: Testing Both Gaps Together")
print("=" * 70)
# Create agent
agent = TabularQAgent(learning_rate=0.15, epsilon=0.4, discount=0.95)
# Minimal training config
config = RLTrainingConfig(
episodes=2,
episode_length_days=10,
cases_per_episode=50,
training_seed=42,
)
print("Running mini training (2 episodes, 50 cases, 10 days)...")
stats = train_agent(agent, rl_config=config, verbose=False)
assert len(stats["episodes"]) == 2, "Should complete 2 episodes"
assert stats["episodes"][-1] == 1, "Last episode should be episode 1"
print(f"Training completed: {len(stats['episodes'])} episodes")
print(f" Final disposal rate: {stats['disposal_rates'][-1]:.1%}")
print(f" States explored: {stats['states_explored'][-1]}")
print("\n✅ END-TO-END: Both gaps working together successfully\n")
if __name__ == "__main__":
print("\n" + "=" * 70)
print("TESTING GAP FIXES")
print("=" * 70)
try:
test_gap1_eda_alignment()
test_gap2_ripeness_feedback()
test_end_to_end()
print("\n" + "=" * 70)
print("ALL TESTS PASSED")
print("=" * 70)
print("\nSummary:")
print(" ✅ Gap 1: RL training aligned with EDA parameters")
print(" ✅ Gap 2: Ripeness feedback loop operational")
print(" ✅ End-to-end: Both gaps working together")
print("\nBoth confirmed gaps are now FIXED!")
print("=" * 70 + "\n")
except Exception as e:
print(f"\nTEST FAILED: {e}")
raise