File size: 2,910 Bytes
0914e96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File: ai-service/training/train_performance_predictor.py
import pandas as pd
import xgboost as xgb
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
import joblib
import os
import sys
# Root directory ko path mein add karein
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(ROOT_DIR)
def train_performance_models():
"""
Loads the performance data CSV and trains two separate models:
one for predicting 'likes' and one for predicting 'comments'.
"""
print("--- Starting Performance Predictor Model Training ---")
data_path = os.path.join(ROOT_DIR, 'data', 'performance_training_data.csv')
try:
df = pd.read_csv(data_path)
print(f"Loaded {len(df)} rows from {data_path}")
if df.empty:
print("β οΈ CSV file is empty. Aborting model training.")
return
except (FileNotFoundError, pd.errors.EmptyDataError):
print(f"π΄ ERROR: Data file not found or is empty at {data_path}")
return
# Features (X) hum in cheezon se likes/comments ka anuman lagayenge
features = ['follower_count', 'caption_length', 'campaign_niche', 'content_format']
X = df[features]
# Targets (y)
y_likes = df['likes']
y_comments = df['comments']
# Preprocessing pipeline (categorical features ke liye)
categorical_features = ['campaign_niche', 'content_format']
preprocessor = ColumnTransformer(
transformers=[('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)],
remainder='passthrough'
)
# ---- Model #1: Likes Predictor ----
print("\n--- Training Likes Predictor Model ---")
likes_pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('regressor', xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42))
])
likes_pipeline.fit(X, y_likes)
likes_model_path = os.path.join(ROOT_DIR, 'models', 'likes_predictor_v1.joblib')
joblib.dump(likes_pipeline, likes_model_path)
print(f"β
Likes Predictor model saved to: {likes_model_path}")
# ---- Model #2: Comments Predictor ----
print("\n--- Training Comments Predictor Model ---")
comments_pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('regressor', xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.05, random_state=42))
])
comments_pipeline.fit(X, y_comments)
comments_model_path = os.path.join(ROOT_DIR, 'models', 'comments_predictor_v1.joblib')
joblib.dump(comments_pipeline, comments_model_path)
print(f"β
Comments Predictor model saved to: {comments_model_path}")
print("\nπ All performance models trained and saved successfully!")
if __name__ == '__main__':
train_performance_models() |