Spaces:
Runtime error
Runtime error
File size: 6,346 Bytes
6f7e932 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | """
A/B Testing Framework
Compare v1 vs v2 predictor accuracy on historical data.
"""
import logging
import json
from datetime import datetime
from pathlib import Path
from typing import Dict, List
import random
logger = logging.getLogger(__name__)
DATA_DIR = Path(__file__).parent.parent / "data"
AB_RESULTS_DIR = DATA_DIR / "ab_tests"
AB_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
class ABTester:
"""A/B testing for prediction models"""
def __init__(self):
self.tests: Dict[str, Dict] = {}
self.current_variant = 'A' # Default to v1
def create_test(self, test_name: str, variant_a: str = 'v1', variant_b: str = 'v2'):
"""Create a new A/B test"""
self.tests[test_name] = {
'name': test_name,
'variant_a': variant_a,
'variant_b': variant_b,
'results_a': {'predictions': 0, 'correct': 0},
'results_b': {'predictions': 0, 'correct': 0},
'created_at': datetime.now().isoformat(),
'is_active': True
}
return self.tests[test_name]
def get_variant(self, test_name: str, user_id: str = None) -> str:
"""Get variant for a user (deterministic based on user ID)"""
if test_name not in self.tests:
return 'A'
if user_id:
# Consistent assignment based on user ID
return 'A' if hash(user_id) % 2 == 0 else 'B'
else:
# Random assignment
return random.choice(['A', 'B'])
def record_prediction(self, test_name: str, variant: str, correct: bool):
"""Record a prediction result"""
if test_name not in self.tests:
return
key = 'results_a' if variant == 'A' else 'results_b'
self.tests[test_name][key]['predictions'] += 1
if correct:
self.tests[test_name][key]['correct'] += 1
def get_results(self, test_name: str) -> Dict:
"""Get A/B test results"""
if test_name not in self.tests:
return {'error': 'Test not found'}
test = self.tests[test_name]
a_preds = test['results_a']['predictions']
a_correct = test['results_a']['correct']
b_preds = test['results_b']['predictions']
b_correct = test['results_b']['correct']
a_acc = a_correct / a_preds if a_preds > 0 else 0
b_acc = b_correct / b_preds if b_preds > 0 else 0
# Simple significance test (needs more data in practice)
winner = None
if a_preds >= 100 and b_preds >= 100:
diff = abs(a_acc - b_acc)
if diff > 0.05: # 5% difference threshold
winner = 'A' if a_acc > b_acc else 'B'
return {
'test_name': test_name,
'variant_a': test['variant_a'],
'variant_b': test['variant_b'],
'results': {
'A': {'predictions': a_preds, 'correct': a_correct, 'accuracy': round(a_acc, 4)},
'B': {'predictions': b_preds, 'correct': b_correct, 'accuracy': round(b_acc, 4)}
},
'winner': winner,
'improvement': round((b_acc - a_acc) * 100, 2) if a_acc > 0 else None,
'is_significant': winner is not None
}
def run_historical_test(self, test_name: str = 'v1_vs_v2') -> Dict:
"""Run A/B test on historical data"""
self.create_test(test_name, 'predictor_v1', 'enhanced_predictor_v2')
# Load historical matches
try:
import pandas as pd
data_file = DATA_DIR / "training_data.csv"
if not data_file.exists():
return {'error': 'No training data available'}
df = pd.read_csv(data_file)
df = df.dropna(subset=['home_score', 'away_score'])
# Use last 1000 matches for testing
test_df = df.tail(1000)
# Import predictors
try:
from src.predictor import PredictionEngine
from src.enhanced_predictor_v2 import get_enhanced_predictor
v1 = PredictionEngine()
v2 = get_enhanced_predictor()
except Exception as e:
logger.error(f"Could not load predictors: {e}")
return {'error': str(e)}
for _, row in test_df.iterrows():
home = row['home_team']
away = row['away_team']
# Actual result
if row['home_score'] > row['away_score']:
actual = 'Home Win'
elif row['home_score'] < row['away_score']:
actual = 'Away Win'
else:
actual = 'Draw'
# V1 prediction
try:
v1_pred = v1.predict_match({'home_team': {'name': home}, 'away_team': {'name': away}})
v1_outcome = v1_pred.get('prediction', {}).get('predicted_outcome', '')
self.record_prediction(test_name, 'A', v1_outcome == actual)
except:
pass
# V2 prediction
try:
v2_pred = v2.predict(home, away)
v2_outcome = v2_pred.get('final_prediction', {}).get('predicted_outcome', '')
self.record_prediction(test_name, 'B', v2_outcome == actual)
except:
pass
# Save results
results = self.get_results(test_name)
with open(AB_RESULTS_DIR / f"{test_name}.json", 'w') as f:
json.dump(results, f, indent=2)
return results
except Exception as e:
logger.error(f"A/B test error: {e}")
return {'error': str(e)}
# Global instance
_tester = None
def get_ab_tester() -> ABTester:
global _tester
if _tester is None:
_tester = ABTester()
return _tester
def run_ab_test(test_name: str = 'v1_vs_v2'):
return get_ab_tester().run_historical_test(test_name)
def get_ab_results(test_name: str):
return get_ab_tester().get_results(test_name)
|