Auto_ML / backend /core /recommendations.py
abhiraj12's picture
Initial commit
2c29579
# ── Smart Recommendations ─────────────────────────────────────────────────────
def generate_recommendations(profile: dict, results: dict) -> list:
"""
Return a prioritised list of actionable data science recommendations.
Each item: {priority, icon, title, detail, action}
"""
recs = []
# ✅ FIX 1: safe defaults
col_stats = profile.get("column_stats") or {}
shap = results.get("shap_summary") or {}
is_clf = results.get("is_classification", True)
# ✅ FIX 2: safe score
try:
score = float(results.get("score", 0))
except Exception:
score = 0
rows = profile.get("rows", 0)
# ── Missing values ────────────────────────────────────────────────────
for col, stats in col_stats.items():
if not isinstance(stats, dict):
continue
mp = stats.get("missing_pct", 0)
if mp > 50:
recs.append({
"priority": "🔴 Critical",
"title": f"Drop column '{col}'",
"detail": f"{mp}% of values are missing — this column adds more noise than signal.",
"action": f"df.drop(columns=['{col}'], inplace=True)",
})
elif mp > 20:
recs.append({
"priority": "🟡 Important",
"title": f"Review imputation for '{col}'",
"detail": f"{mp}% missing. Consider domain-specific fills or median/mode by group.",
"action": f"df['{col}'].fillna(df['{col}'].median(), inplace=True)",
})
# ── Zero variance ─────────────────────────────────────────────────────
zero_var = [
c for c, s in col_stats.items()
if isinstance(s, dict) and s.get("unique", 99) <= 1
]
for col in zero_var:
recs.append({
"priority": "🔴 Critical",
"title": f"Drop constant column '{col}'",
"detail": "This column has only one unique value and contributes nothing to the model.",
"action": f"df.drop(columns=['{col}'], inplace=True)",
})
# ── High skew ─────────────────────────────────────────────────────────
for col, stats in col_stats.items():
if not isinstance(stats, dict):
continue
skew = stats.get("skew")
if skew is not None:
try:
skew_val = float(skew)
if abs(skew_val) > 5:
recs.append({
"priority": "🟡 Important",
"title": f"Log-transform skewed feature '{col}'",
"detail": f"Skewness = {skew_val:.2f}. Heavy skew hurts linear models and slows tree convergence.",
"action": f"import numpy as np\ndf['{col}'] = np.log1p(df['{col}'].clip(lower=0))",
})
except (ValueError, TypeError):
continue
# ── Class imbalance ───────────────────────────────────────────────────
if profile.get("imbalance") == "High ⚠️" and is_clf:
recs.append({
"priority": "🔴 Critical",
"title": "Address class imbalance with SMOTE",
"detail": "Your target classes are severely skewed. Minority class recall will be poor.",
"action": "from imblearn.over_sampling import SMOTE\nX_res, y_res = SMOTE().fit_resample(X, y)",
})
# ── Small dataset ─────────────────────────────────────────────────────
if rows < 500:
recs.append({
"priority": "🟡 Important",
"title": "Expand your dataset with Synthetic Data",
"detail": f"Only {rows} rows detected. Use the Synthetic Generator on the AI Tools page.",
"action": "Go to 8_AI_Tools → Synthetic Data Generator",
})
# ── Low score suggestions ─────────────────────────────────────────────
if score < 70:
recs.append({
"priority": "🟡 Important",
"title": "Try feature engineering",
"detail": f"Score is {score}%. Consider creating interaction terms or polynomial features.",
"action": "from sklearn.preprocessing import PolynomialFeatures\npf = PolynomialFeatures(degree=2, interaction_only=True)",
})
if rows < 2000:
recs.append({
"priority": "🟡 Important",
"title": "Collect more training data",
"detail": "Score under 70% with a small dataset usually means the model is data-hungry.",
"action": "Focus data collection on the features with highest SHAP importance.",
})
# ── SHAP-based feature pruning ────────────────────────────────────────
if isinstance(shap, dict) and shap:
num_cols = profile.get("num_cols") or []
cat_cols = profile.get("cat_cols") or []
all_features = set(num_cols + cat_cols)
important_features = set(shap.keys())
unimportant = all_features - important_features
if len(unimportant) >= 3:
sample = list(unimportant)[:5]
recs.append({
"priority": "🟢 Suggestion",
"title": f"Consider dropping {len(unimportant)} low-importance features",
"detail": f"SHAP analysis shows these features contribute minimally: {', '.join(sample)}",
"action": f"df.drop(columns={sample[:3]}, inplace=True)",
})
# ── Positive confirmation ────────────────────────────────────────────
if score >= 85 and not recs:
recs.append({
"priority": "✅ Excellent",
"title": "Your pipeline is well-optimised!",
"detail": f"Score of {score}% with a clean dataset. Consider deploying the model.",
"action": "Download the export bundle and run the generated API.",
})
# Sort: Critical first
order = {"🔴 Critical": 0, "🟡 Important": 1, "🟢 Suggestion": 2, "✅ Excellent": 3}
recs.sort(key=lambda r: order.get(r["priority"], 9))
return recs