# app.py import os import re import joblib import numpy as np import gradio as gr # ------------------------- # Helper: safe-loading # ------------------------- def try_load(path_options): for p in path_options: if p is None: continue if os.path.exists(p): try: model = joblib.load(p) print(f"Loaded: {p}") return model, p except Exception as e: print(f"Failed to load {p}: {e}") return None, None ROOT = os.path.dirname(__file__) if "__file__" in globals() else os.getcwd() MODEL_DIR = os.path.join(ROOT, "models") # try multiple plausible names/locations tfidf_candidates = [ os.path.join(MODEL_DIR, "tfidf_vectorizer.pkl"), os.path.join(MODEL_DIR, "tfidf.pkl"), os.path.join(ROOT, "tfidf_vectorizer.pkl"), os.path.join(ROOT, "tfidf.joblib"), os.path.join(MODEL_DIR, "tfidf_vectorizer.joblib"), os.path.join(MODEL_DIR, "tfidf.joblib"), ] logreg_candidates = [ os.path.join(MODEL_DIR, "logreg_model.pkl"), os.path.join(MODEL_DIR, "logreg.pkl"), os.path.join(ROOT, "logreg_model.pkl"), os.path.join(ROOT, "logreg.pkl"), os.path.join(MODEL_DIR, "logreg.joblib"), ] lgbm_candidates = [ os.path.join(MODEL_DIR, "lgbm_model.pkl"), os.path.join(MODEL_DIR, "lgbm.pkl"), os.path.join(ROOT, "lgbm_model.pkl"), os.path.join(ROOT, "lgbm.pkl"), ] tfidf, tfidf_path = try_load(tfidf_candidates) logreg, logreg_path = try_load(logreg_candidates) lgbm, lgbm_path = try_load(lgbm_candidates) DEFAULT_LABELS = ['negative', 'neutral', 'positive'] # ------------------------- # Text preprocessing # ------------------------- def clean_text(t): if t is None: return "" s = str(t).lower() s = re.sub(r"\s+", " ", s) s = re.sub(r"[^a-z0-9\s']", " ", s) return s.strip() # ------------------------- # Prediction logic # ------------------------- import warnings warnings.filterwarnings("ignore") def get_model_classes(model): if hasattr(model, "classes_"): return list(model.classes_) if hasattr(model, "classes"): return list(model.classes) return DEFAULT_LABELS def predict_one(text, model_choice="Logistic Regression"): text_clean = clean_text(text) if not text_clean: return { "label": "neutral", "confidence": 0.0, "html": "No text provided", "error": None } if tfidf is None: return {"label": None, "confidence": 0.0, "html": "", "error": "Vectorizer (tfidf) not found. Upload tfidf_vectorizer.pkl to models/."} X = tfidf.transform([text_clean]) try: if model_choice == "Logistic Regression" and logreg is not None: probs = logreg.predict_proba(X)[0] classes = get_model_classes(logreg) elif model_choice == "LightGBM" and lgbm is not None: try: probs = lgbm.predict_proba(X)[0] except Exception: probs = lgbm.predict_proba(X.toarray())[0] classes = get_model_classes(lgbm) else: if logreg is not None: probs = logreg.predict_proba(X)[0]; classes = get_model_classes(logreg) elif lgbm is not None: try: probs = lgbm.predict_proba(X)[0] except: probs = lgbm.predict_proba(X.toarray())[0] classes = get_model_classes(lgbm) else: return {"label": None, "confidence": 0.0, "html": "", "error": "No model found. Upload logreg_model.pkl or lgbm_model.pkl to models/."} except Exception as e: return {"label": None, "confidence": 0.0, "html": "", "error": f"Prediction error: {e}"} idx = int(np.argmax(probs)) label = classes[idx] confidence = float(probs[idx]) colors = { 'positive': '#16a34a', 'neutral': '#f59e0b', 'negative': '#ef4444' } bars_html = "" for c, p in zip(classes, probs): col = colors.get(str(c).lower(), "#3b82f6") pct = float(p) * 100.0 bars_html += f"""
models/ as described in README.md.models/ or rename files appropriately.")
def run_and_format(text, model_choice):
res = predict_one(text, model_choice)
if res.get("error"):
return f"**Error:** {res['error']}", "", gr.update(value=f"models/