Spaces:

amitbhatt6075
/

reachify-ai-service

Running

App Files Files Community

reachify-ai-service / training /train_thunderbird_market_predictor.py

amitbhatt6075

feat: Trained model with real Google Trends data

8927482 3 days ago

raw

history blame contribute delete

2.33 kB

	import os
	import pandas as pd
	import joblib
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.preprocessing import OneHotEncoder
	from sklearn.compose import ColumnTransformer
	from sklearn.pipeline import Pipeline
	from sklearn.metrics import mean_squared_error

	# --- CONFIGURATION ---
	DATA_FILE = os.path.join(os.path.dirname(__file__), '..', 'data', 'thunderbird_market_trends.csv')
	MODEL_OUTPUT_FILE = os.path.join(os.path.dirname(__file__), '..', 'models', 'thunderbird_market_predictor_v1.joblib')

	def train_model():
	print("--- Starting Thunderbird Market Predictor Training ---")

	# 1. Load Data
	try:
	df = pd.read_csv(DATA_FILE)
	print(f"✅ Data loaded successfully. Shape: {df.shape}")
	except FileNotFoundError:
	print(f"❌ ERROR: Training data not found at {DATA_FILE}. Run the export script first.")
	return

	# 2. Preprocessing & Feature Engineering
	df['month'] = pd.to_datetime(df['month'])
	df['month_of_year'] = df['month'].dt.month

	X = df[['niche', 'trend_score', 'month_of_year']]
	y = df['successful_campaigns']

	# 3. Create a preprocessing pipeline for categorical features
	categorical_features = ['niche']
	preprocessor = ColumnTransformer(
	transformers=[
	('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
	],
	remainder='passthrough' # Keep other columns (trend_score, month_of_year)
	)

	# 4. Define the model
	model = RandomForestRegressor(n_estimators=100, random_state=42, min_samples_leaf=2)

	# 5. Create the full pipeline
	pipeline = Pipeline(steps=[('preprocessor', preprocessor),
	('regressor', model)])

	# 6. Train the model
	print("🚀 Training the model...")
	pipeline.fit(X, y)
	print("✅ Model training complete.")

	# 7. Evaluate the model (optional)
	predictions = pipeline.predict(X)
	mse = mean_squared_error(y, predictions)
	print(f" - Model Evaluation (MSE on training data): {mse:.2f}")

	# 8. Save the entire pipeline (preprocessor + model)
	joblib.dump(pipeline, MODEL_OUTPUT_FILE)
	print(f"\n✅ Success! Trained model saved to: {MODEL_OUTPUT_FILE}")

	if __name__ == "__main__":
	train_model()