|
|
import pandas as pd |
|
|
from sklearn.ensemble import RandomForestRegressor |
|
|
from sklearn.compose import ColumnTransformer |
|
|
from sklearn.pipeline import Pipeline |
|
|
from sklearn.preprocessing import OneHotEncoder |
|
|
import joblib |
|
|
import os |
|
|
|
|
|
print("Training script started...") |
|
|
|
|
|
|
|
|
df = pd.read_csv('data/sample_data.csv') |
|
|
|
|
|
|
|
|
X = df.drop('match_score', axis=1) |
|
|
y = df['match_score'] |
|
|
|
|
|
|
|
|
categorical_features = ['niche', 'country'] |
|
|
numeric_features = ['followers', 'engagement_rate'] |
|
|
|
|
|
preprocessor = ColumnTransformer( |
|
|
transformers=[ |
|
|
('num', 'passthrough', numeric_features), |
|
|
('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features) |
|
|
]) |
|
|
|
|
|
|
|
|
model = RandomForestRegressor(n_estimators=100, random_state=42) |
|
|
|
|
|
|
|
|
pipeline = Pipeline(steps=[('preprocessor', preprocessor), |
|
|
('regressor', model)]) |
|
|
|
|
|
|
|
|
pipeline.fit(X, y) |
|
|
print("Model training complete.") |
|
|
|
|
|
|
|
|
|
|
|
if not os.path.exists('models'): |
|
|
os.makedirs('models') |
|
|
|
|
|
model_path = 'models/influencer_matcher_v1.joblib' |
|
|
joblib.dump(pipeline, model_path) |
|
|
print(f"Model successfully saved to {model_path}") |