Spaces:
Build error
Build error
| import os | |
| import sys | |
| import pickle | |
| import pandas as pd | |
| from datetime import datetime | |
| # Add project root to path | |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) | |
| from src.pipeline import RossmannPipeline | |
| from src.core import setup_logger | |
| logger = setup_logger(__name__) | |
| def run_production_training(): | |
| """ | |
| Executes a formal production training run. | |
| """ | |
| train_csv = os.path.abspath("data/raw/train.csv") | |
| if not os.path.exists(train_csv): | |
| logger.error(f"Raw data not found at {train_csv}. Please ensure data is present.") | |
| return | |
| logger.info("Initializing Production Training Pipeline...") | |
| pipeline = RossmannPipeline(train_csv) | |
| # 1. Ingest Full Dataset | |
| logger.info("Ingesting full dataset...") | |
| df_raw = pipeline.ingestor.ingest(train_csv) | |
| # 2. Feature Engineering | |
| logger.info("Running feature engineering...") | |
| df_feat = pipeline.run_feature_engineering(df_raw) | |
| # 3. Define Final Feature Set | |
| # Consistent Encoding with app.py | |
| if 'StoreType' in df_feat.columns: | |
| df_feat['StoreType'] = df_feat['StoreType'].astype(str).map({'a':1, 'b':2, 'c':3, 'd':4}).fillna(0) | |
| if 'Assortment' in df_feat.columns: | |
| df_feat['Assortment'] = df_feat['Assortment'].astype(str).map({'a':1, 'b':2, 'c':3}).fillna(0) | |
| feature_cols = [ | |
| 'Store', 'DayOfWeek', 'Promo', 'StateHoliday', 'SchoolHoliday', | |
| 'Year', 'Month', 'Day', 'IsWeekend', 'DayOfMonth', | |
| 'CompetitionDistance', 'StoreType', 'Assortment' | |
| ] + [c for c in df_feat.columns if 'fourier' in c or 'easter' in c] | |
| # 4. Final Training (using all available data to create the 'Gold' model) | |
| X = df_feat[feature_cols].fillna(0) | |
| y = df_feat['target'] | |
| logger.info(f"Training final model on {len(df_feat)} records with {len(feature_cols)} features...") | |
| pipeline.train(X, y) | |
| # 5. Export Model & Metadata | |
| os.makedirs('models', exist_ok=True) | |
| model_path = 'models/rossmann_production_model.pkl' | |
| with open(model_path, 'wb') as f: | |
| pickle.dump(pipeline.model, f) | |
| # Log the successful run | |
| pipeline.tracker.log_experiment( | |
| name="production_training_run", | |
| params={ | |
| "feature_count": len(feature_cols), | |
| "data_size": len(df_feat), | |
| "model_type": str(type(pipeline.model)) | |
| }, | |
| metrics={"status": "success"} | |
| ) | |
| logger.info("--- PRODUCTION TRAINING COMPLETE ---") | |
| logger.info(f"Model saved to: {model_path}") | |
| logger.info("Project is now ready for deployment/inference.") | |
| if __name__ == "__main__": | |
| run_production_training() | |