import pandas as pd from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import r2_score import joblib import os print("Starting model training...") # ✨ RENAMED: File ka naam badal diya gaya hai taaki conflict na ho DATA_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'sample_performance_training_data.csv') MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'models', 'performance_scorer_v1.joblib') # Model directory banayein agar exist nahi karti os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True) # 1. Data Load karein try: df = pd.read_csv(DATA_PATH) except FileNotFoundError: print(f"ERROR: Training data file not found at {DATA_PATH}") print("Please ensure 'sample_performance_training_data.csv' exists in the 'ai-service/data' directory.") exit() # 2. Features (X) aur Target (y) define karein features = ['avg_engagement_rate', 'on_time_submission_rate', 'avg_brand_rating', 'monthly_earnings'] target = 'performance_score' X = df[features] y = df[target] # 3. Data ko training aur testing sets mein split karein X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 4. Model ko initialize aur train karein model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(X_train, y_train) # 5. Model ki accuracy check karein (optional but good practice) y_pred = model.predict(X_test) accuracy = r2_score(y_test, y_pred) print(f"Model trained successfully! R^2 Score: {accuracy:.2f}") # 6. Trained model ko file mein save karein joblib.dump(model, MODEL_PATH) print(f"Model saved to {MODEL_PATH}")