Spaces:

jeevitha-app
/

Sentiment_analyzer

Sleeping

App Files Files Community

jeevitha-app commited on Oct 21, 2025

Commit

5cf7d88

verified ·

1 Parent(s): dca10b4

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -69

app.py CHANGED Viewed

@@ -1,83 +1,89 @@
 import gradio as gr
 import joblib
-import re
 import numpy as np
-# ==========================================================
-# 🔹 1️⃣ Load Models and Vectorizers
-# ==========================================================
-english_model = joblib.load("logistic_regression_english.pkl")
-english_vectorizer = joblib.load("tfidf_vectorizer_english.pkl")
-persian_model = joblib.load("logistic_regression_persian.pkl")
-persian_vectorizer = joblib.load("tfidf_vectorizer_persian.pkl")
-# Label mapping from training
-label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
-# ==========================================================
-# 🔹 2️⃣ Preprocessing (must match training exactly)
-# ==========================================================
-def clean_english_text(text):
-    text = text.lower()
-    text = re.sub(r"http\S+|www\S+|https\S+", "", text)
-    text = re.sub(r"[^a-zA-Z\s]", "", text)
-    text = re.sub(r"\s+", " ", text).strip()
-    return text
-def clean_persian_text(text):
-    text = re.sub(r"[^\u0600-\u06FF\s]", "", text)  # keep only Persian chars
-    text = re.sub(r"\s+", " ", text).strip()
-    return text
-# ==========================================================
-# 🔹 3️⃣ Prediction Function
-# ==========================================================
-def predict_sentiment(text, language):
     if not text.strip():
-        return "⚠ Please enter some text."
-    if language == "English":
-        cleaned = clean_english_text(text)
-        vec = english_vectorizer.transform([cleaned])
-        probs = english_model.predict_proba(vec)[0]
-        pred = np.argmax(probs)
-        return f"Prediction: {label_map[pred]} ({probs[pred]:.2f} confidence)"
-    elif language == "Persian":
-        cleaned = clean_persian_text(text)
-        vec = persian_vectorizer.transform([cleaned])
-        probs = persian_model.predict_proba(vec)[0]
-        pred = np.argmax(probs)
-        return f"Prediction: {label_map[pred]} ({probs[pred]:.2f} confidence)"
     else:
-        return "❌ Invalid language option selected."
-# ==========================================================
-# 🔹 4️⃣ Debug Info (Optional - Check Vocabulary Size)
-# ==========================================================
-print(f"✅ English vectorizer vocabulary size: {len(english_vectorizer.get_feature_names_out())}")
-print(f"✅ Persian vectorizer vocabulary size: {len(persian_vectorizer.get_feature_names_out())}")
-# ==========================================================
-# 🔹 5️⃣ Gradio Interface
-# ==========================================================
-iface = gr.Interface(
-    fn=predict_sentiment,
-    inputs=[
-        gr.Textbox(lines=3, label="Enter Text"),
-        gr.Radio(["English", "Persian"], label="Select Language", value="English")
-    ],
-    outputs=gr.Textbox(label="Predicted Sentiment"),
-    title="🌍 Multilingual Sentiment Classifier (English & Persian)",
-    description="Choose your language and get sentiment prediction with confidence score.",
-    examples=[
-        ["This movie was amazing!", "English"],
-        ["The worst experience ever", "English"],
-        ["این فیلم خیلی بد بود", "Persian"],
-        ["من این محصول را دوست دارم", "Persian"]
-    ]
-)
-iface.launch()

+# =====================================================
+# 🌐 Multi-lingual Sentiment Analyzer (English + Persian)
+# =====================================================
 import gradio as gr
 import joblib
 import numpy as np
+import shap
+import os
+# -----------------------------------------------------
+# ✅ Load Models and Vectorizers
+# -----------------------------------------------------
+MODEL_DIR = "models"
+eng_model_path = os.path.join(MODEL_DIR, "logistic_regression_english.pkl")
+eng_vectorizer_path = os.path.join(MODEL_DIR, "tfidf_vectorizer_english.pkl")
+per_model_path = os.path.join(MODEL_DIR, "logistic_regression_persian.pkl")
+per_vectorizer_path = os.path.join(MODEL_DIR, "tfidf_vectorizer_persian.pkl")
+eng_model = joblib.load(eng_model_path)
+eng_vectorizer = joblib.load(eng_vectorizer_path)
+per_model = joblib.load(per_model_path)
+per_vectorizer = joblib.load(per_vectorizer_path)
+# -----------------------------------------------------
+# ✅ Label mapping
+# -----------------------------------------------------
+label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
+# -----------------------------------------------------
+# ✅ Prediction Function
+# -----------------------------------------------------
+def predict_sentiment(text, lang):
     if not text.strip():
+        return "⚠️ Please enter some text.", "", "", ""
+    # Select appropriate model and vectorizer
+    if lang == "English":
+        vectorizer = eng_vectorizer
+        model = eng_model
     else:
+        vectorizer = per_vectorizer
+        model = per_model
+    # Vectorize text
+    X = vectorizer.transform([text])
+    pred = model.predict(X)[0]
+    probs = model.predict_proba(X)[0]
+    conf = np.max(probs)
+    sentiment = label_map.get(pred, "Unknown")
+    # SHAP explanation
+    try:
+        explainer = shap.LinearExplainer(model, X, feature_dependence="independent")
+        shap_values = explainer.shap_values(X)
+        shap_html = shap.plots.text(explainer, X, display=False)
+    except Exception:
+        shap_html = "<p>⚠️ No explanation available for this input.</p>"
+    return f"Prediction: {sentiment}", f"Confidence: {conf:.3f}", shap_html, ""
+# -----------------------------------------------------
+# ✅ Build Gradio Interface
+# -----------------------------------------------------
+with gr.Blocks(theme=gr.themes.Soft()) as app:
+    gr.Markdown("<h2 style='text-align:center;'>🌍 Multi-lingual Sentiment (English + Persian)</h2>")
+    with gr.Row():
+        comment = gr.Textbox(label="Comment", placeholder="Type your comment here...")
+        lang = gr.Radio(["English", "Persian"], label="Language", value="English")
+    predict_btn = gr.Button("Predict", variant="primary")
+    output1 = gr.Textbox(label="Prediction")
+    output2 = gr.Textbox(label="Confidence")
+    output3 = gr.HTML(label="Explanation")
+    predict_btn.click(
+        predict_sentiment,
+        inputs=[comment, lang],
+        outputs=[output1, output2, output3, gr.Textbox(visible=False)]
+    )
+# -----------------------------------------------------
+# ✅ Launch App
+# -----------------------------------------------------
+if __name__ == "__main__":
+    app.launch()