File size: 2,331 Bytes
01c71d2 8927482 01c71d2 8927482 01c71d2 8927482 01c71d2 8927482 01c71d2 8927482 01c71d2 8927482 01c71d2 8927482 01c71d2 8927482 01c71d2 8927482 01c71d2 8927482 01c71d2 8927482 01c71d2 8927482 01c71d2 8927482 01c71d2 8927482 01c71d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import os
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
# --- CONFIGURATION ---
DATA_FILE = os.path.join(os.path.dirname(__file__), '..', 'data', 'thunderbird_market_trends.csv')
MODEL_OUTPUT_FILE = os.path.join(os.path.dirname(__file__), '..', 'models', 'thunderbird_market_predictor_v1.joblib')
def train_model():
print("--- Starting Thunderbird Market Predictor Training ---")
# 1. Load Data
try:
df = pd.read_csv(DATA_FILE)
print(f"β
Data loaded successfully. Shape: {df.shape}")
except FileNotFoundError:
print(f"β ERROR: Training data not found at {DATA_FILE}. Run the export script first.")
return
# 2. Preprocessing & Feature Engineering
df['month'] = pd.to_datetime(df['month'])
df['month_of_year'] = df['month'].dt.month
X = df[['niche', 'trend_score', 'month_of_year']]
y = df['successful_campaigns']
# 3. Create a preprocessing pipeline for categorical features
categorical_features = ['niche']
preprocessor = ColumnTransformer(
transformers=[
('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
],
remainder='passthrough' # Keep other columns (trend_score, month_of_year)
)
# 4. Define the model
model = RandomForestRegressor(n_estimators=100, random_state=42, min_samples_leaf=2)
# 5. Create the full pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
('regressor', model)])
# 6. Train the model
print("π Training the model...")
pipeline.fit(X, y)
print("β
Model training complete.")
# 7. Evaluate the model (optional)
predictions = pipeline.predict(X)
mse = mean_squared_error(y, predictions)
print(f" - Model Evaluation (MSE on training data): {mse:.2f}")
# 8. Save the entire pipeline (preprocessor + model)
joblib.dump(pipeline, MODEL_OUTPUT_FILE)
print(f"\nβ
Success! Trained model saved to: {MODEL_OUTPUT_FILE}")
if __name__ == "__main__":
train_model() |