reachify-ai-service / core /predictor.py
amitbhatt6075's picture
Fix crash: Update scikit-learn version and safe predictor loading
b901ed7
import joblib
import pandas as pd
import numpy as np
from typing import List, Dict
import traceback
print(">>> Loading ai-service/core/predictor.py (Version: FINAL, SAFE LOAD)")
# --- SAFE MODEL LOADING ---
# Hum 'Exception' use kar rahe hain taaki version mismatch ya missing file
# se server crash na ho.
influencer_pipeline = None
performance_pipeline = None
try:
print("--- Loading Influencer Matcher Model... ---")
influencer_pipeline = joblib.load('models/influencer_matcher_v1.joblib')
print("✅ Influencer Matcher loaded successfully.")
except Exception as e:
print(f"⚠️ Warning: Influencer Matcher model failed to load: {e}")
# Continue running, don't crash
try:
print("--- Loading Performance Predictor Model... ---")
performance_pipeline = joblib.load('models/performance_predictor_v1.joblib')
print("✅ Performance Predictor loaded successfully.")
except Exception as e:
print(f"⚠️ Warning: Performance Predictor model failed to load: {e}")
# Continue running
# --- LOGIC ---
def rank_influencers_by_match(influencers: List[Dict], campaign_details: Dict, top_n: int = 5) -> List[Dict]:
"""
Influencers ko rank karta hai. Agar model nahi hai toh basic fallback use karega.
"""
# 1. Agar model load nahi hua ya data khali hai, toh empty list bhejo (Crash mat karo)
if not influencers or influencer_pipeline is None:
print("ℹ️ Matcher skipped: No model or no influencers provided.")
return influencers[:top_n] # Fallback: Return original list as is
try:
print(f"--- Predictor: Ranking {len(influencers)} influencers...")
# DataFrame Prepare
influencer_df = pd.DataFrame(influencers)
# Safe Feature Extraction
features = influencer_df[['category', 'bio']].copy() if 'bio' in influencer_df else pd.DataFrame()
if features.empty: return influencers[:top_n]
features['niche'] = campaign_details.get('category', 'General')
features['country'] = campaign_details.get('location', 'USA')
features['followers'] = campaign_details.get('followers', 10000)
features['engagement_rate'] = campaign_details.get('engagement_rate', 0.03)
# Prediction
match_scores = influencer_pipeline.predict(features)
influencer_df['match_score'] = match_scores
# Sorting
top_influencers_df = influencer_df.sort_values(by='match_score', ascending=False).head(top_n)
# Cleanup Result
result_cols = ['id', 'name', 'handle', 'followers', 'category', 'bio']
final_cols = [col for col in result_cols if col in top_influencers_df.columns]
results = top_influencers_df[final_cols].to_dict(orient='records')
return results
except Exception as e:
print(f"❌ Ranking Error: {e}")
traceback.print_exc()
return influencers[:top_n] # Error aane par original list wapas kardo
def predict_performance(data: dict) -> int:
"""
Campaign performance predict karta hai.
"""
if performance_pipeline is None:
return 50000 # Default/Safe Value
try:
df = pd.DataFrame(data, index=[0])
prediction = performance_pipeline.predict(df)
return int(prediction[0])
except Exception as e:
print(f"❌ Performance Prediction Error: {e}")
return 50000 # Default/Safe Value