Arpit-Bansal's picture
self-train service prototype added
0162f5e
"""
Test ensemble model training and prediction
"""
import sys
from pathlib import Path
# Add parent directory to path
parent_dir = str(Path(__file__).parent.parent)
if parent_dir not in sys.path:
sys.path.insert(0, parent_dir)
from SelfTrainService.config import CONFIG
from SelfTrainService.trainer import ModelTrainer
from SelfTrainService.data_store import ScheduleDataStore
from SelfTrainService.feature_extractor import FeatureExtractor
from DataService.metro_data_generator import MetroDataGenerator
from DataService.schedule_optimizer import MetroScheduleOptimizer
def test_config():
"""Test configuration"""
print("Testing Configuration...")
print(f" Model Types: {CONFIG.MODEL_TYPES}")
print(f" Use Ensemble: {CONFIG.USE_ENSEMBLE}")
print(f" Retrain Interval: {CONFIG.RETRAIN_INTERVAL_HOURS} hours")
print(f" Features: {len(CONFIG.FEATURES)} features")
print(" βœ“ Config OK")
def test_model_initialization():
"""Test model initialization"""
print("\nTesting Model Initialization...")
trainer = ModelTrainer()
for model_name in CONFIG.MODEL_TYPES:
model = trainer._get_model(model_name)
if model is not None:
print(f" βœ“ {model_name}: {type(model).__name__}")
else:
print(f" βœ— {model_name}: Failed to initialize")
print(" βœ“ Model initialization OK")
def test_data_generation():
"""Test data generation"""
print("\nTesting Data Generation...")
from datetime import datetime
num_trains = 30
generator = MetroDataGenerator(num_trains=num_trains)
route = generator.generate_route()
train_health = generator.generate_train_health_statuses()
optimizer = MetroScheduleOptimizer(
date=datetime.now().strftime("%Y-%m-%d"),
num_trains=num_trains,
route=route,
train_health=train_health
)
schedule = optimizer.optimize_schedule()
print(f" Generated schedule with {len(schedule.trainsets)} trains")
print(f" Total service blocks: {sum(len(t.service_blocks) for t in schedule.trainsets)}")
print(" βœ“ Data generation OK")
def test_feature_extraction():
"""Test feature extraction"""
print("\nTesting Feature Extraction...")
from datetime import datetime
num_trains = 30
generator = MetroDataGenerator(num_trains=num_trains)
route = generator.generate_route()
train_health = generator.generate_train_health_statuses()
optimizer = MetroScheduleOptimizer(
date=datetime.now().strftime("%Y-%m-%d"),
num_trains=num_trains,
route=route,
train_health=train_health
)
feature_extractor = FeatureExtractor()
schedule = optimizer.optimize_schedule()
schedule_dict = schedule.model_dump()
features = feature_extractor.extract_from_schedule(schedule_dict)
print(f" Extracted {len(features)} features")
print(f" Feature names: {list(features.keys())[:5]}...")
quality = feature_extractor.calculate_target(schedule_dict)
print(f" Quality score: {quality:.2f}")
print(" βœ“ Feature extraction OK")
def test_training():
"""Test model training"""
print("\nTesting Model Training...")
from datetime import datetime
# Generate small dataset
data_store = ScheduleDataStore()
print(" Generating 20 sample schedules...")
for i in range(20):
num_trains = 25 + i
generator = MetroDataGenerator(num_trains=num_trains)
route = generator.generate_route()
train_health = generator.generate_train_health_statuses()
optimizer = MetroScheduleOptimizer(
date=datetime.now().strftime("%Y-%m-%d"),
num_trains=num_trains,
route=route,
train_health=train_health
)
schedule = optimizer.optimize_schedule()
data_store.save_schedule(schedule.model_dump())
# Try training (will fail due to insufficient data, but tests the pipeline)
trainer = ModelTrainer()
result = trainer.train(force=True)
if result["success"]:
print(f" βœ“ Training successful")
print(f" Models: {result['models_trained']}")
print(f" Best: {result['best_model']}")
else:
print(f" β“˜ Training skipped: {result['reason']}")
print(" (This is expected with small dataset)")
print(" βœ“ Training pipeline OK")
def test_prediction():
"""Test model prediction"""
print("\nTesting Model Prediction...")
trainer = ModelTrainer()
# Try to load existing model
if trainer.load_model():
print(" βœ“ Loaded existing model")
# Test prediction
test_features = {
"num_trains": 30,
"num_available": 28,
"avg_readiness_score": 85.0,
"total_mileage": 150000,
"mileage_variance": 5000,
"maintenance_count": 3,
"certificate_expiry_count": 1,
"branding_priority_sum": 15,
"time_of_day": 12,
"day_of_week": 3
}
prediction, confidence = trainer.predict(test_features, use_ensemble=True)
print(f" Ensemble Prediction: {prediction:.2f}")
print(f" Confidence: {confidence:.2f}")
prediction_single, confidence_single = trainer.predict(test_features, use_ensemble=False)
print(f" Single Model Prediction: {prediction_single:.2f}")
print(f" Confidence: {confidence_single:.2f}")
print(" βœ“ Prediction OK")
else:
print(" β“˜ No trained model available (run train_model.py first)")
def main():
"""Run all tests"""
print("=" * 60)
print("Ensemble Model System Tests")
print("=" * 60)
try:
test_config()
test_model_initialization()
test_data_generation()
test_feature_extraction()
test_training()
test_prediction()
print("\n" + "=" * 60)
print("All Tests Completed!")
print("=" * 60)
print("\nNext Steps:")
print("1. Install remaining dependencies: pip install -r requirements.txt")
print("2. Generate training data: python SelfTrainService/train_model.py")
print("3. Start retraining service: python SelfTrainService/start_retraining.py")
except Exception as e:
print(f"\nβœ— Test failed with error: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()