Spaces:

mbalvi
/

Multilingual_Sentiment_Analysis_v01

Runtime error

App Files Files Community

mbalvi commited on Aug 30, 2025

Commit

133afa1

verified ·

1 Parent(s): 860d714

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -69

app.py CHANGED Viewed

@@ -1,76 +1,123 @@
-import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
-import pandas as pd
-import os
-# Load multilingual model (English, Urdu, Roman Urdu)
-MODEL_NAME = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
-# Use slow tokenizer (avoid SentencePiece fast conversion error)
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
-model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
-sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
-# File to store logs
-OUTPUT_FILE = "sentiment_logs.xlsx"
-# Initialize Excel if not exists
-if not os.path.exists(OUTPUT_FILE):
-    df = pd.DataFrame(columns=["User_Text", "Label", "Score"])
-    df.to_excel(OUTPUT_FILE, index=False)
-def analyze_sentiment(user_text):
-    """Analyze sentiment in English, Urdu, or Roman Urdu."""
-    if not user_text.strip():
-        return "❌ Please enter some text.", "", "", OUTPUT_FILE
-    # Run model
-    result = sentiment_pipeline(user_text)[0]
-    label = result["label"]
-    score = round(result["score"], 3)
-    # Map labels for readability
-    if "NEGATIVE" in label.upper():
-        polarity = "☹️ Negative"
-    elif "POSITIVE" in label.upper():
-        polarity = "😊 Positive"
-    else:
-        polarity = "😐 Neutral"
-    # Save to Excel
-    df = pd.read_excel(OUTPUT_FILE)
-    new_entry = pd.DataFrame([[user_text, label, score]],
-                             columns=["User_Text", "Label", "Score"])
-    df = pd.concat([df, new_entry], ignore_index=True)
-    df.to_excel(OUTPUT_FILE, index=False)
-    return f"Sentiment: {label}", f"Confidence: {score}", f"Polarity: {polarity}", OUTPUT_FILE
-# Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("## 🌍 Multilingual Sentiment Analysis App\n"
-                "Supports **English, Urdu, Roman Urdu**\n"
-                "Classifies into **Positive, Negative, Neutral**")
-    with gr.Row():
-        user_input = gr.Textbox(label="Enter your text", placeholder="Type in English, Urdu, or Roman Urdu...")
-    analyze_btn = gr.Button("Analyze Sentiment")
-    with gr.Row():
-        sentiment_output = gr.Textbox(label="Sentiment")
-        confidence_output = gr.Textbox(label="Confidence Score")
-        polarity_output = gr.Textbox(label="Polarity")
-    # Downloadable Excel file
-    download_file = gr.File(label="Download Logs (.xlsx)")
-    analyze_btn.click(fn=analyze_sentiment,
-                      inputs=user_input,
-                      outputs=[sentiment_output, confidence_output, polarity_output, download_file])
-# Run app
 if __name__ == "__main__":
-    demo.launch()

+"""
+Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)
+-------------------------------------------------------------
+• Uses Hugging Face model: nlptown/bert-base-multilingual-uncased-sentiment (5-star output)
+• Maps 5-star probabilities to 3 classes:
+      Negative = P(1★) + P(2★)
+      Neutral  = P(3★)
+      Positive = P(4★) + P(5★)
+• Saves each query to sentiment_logs.xlsx (downloadable)
+"""
+import os
+from datetime import datetime
+import pandas as pd
+import gradio as gr
+from transformers import pipeline
+# -------- Model & Pipeline --------
+# This model supports many languages (incl. English/Urdu/Roman Urdu)
+MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
+clf = pipeline("sentiment-analysis", model=MODEL_NAME)
+# -------- Logging setup --------
+LOG_PATH = "sentiment_logs.xlsx"
+if not os.path.exists(LOG_PATH):
+    pd.DataFrame(columns=[
+        "timestamp", "text", "predicted_label_3class", "confidence_3class",
+        "stars_probs", "top_star_label"
+    ]).to_excel(LOG_PATH, index=False)
+def _aggregate_to_3class(star_scores):
+    """
+    star_scores: list of dicts like:
+        [{'label': '1 star', 'score': 0.05}, ..., {'label': '5 stars', 'score': 0.6}]
+    Returns: (pred_label, confidence, probs_dict, top_star_label)
+    """
+    # Normalize keys (some labels are singular/plural)
+    scores = {d["label"].lower(): float(d["score"]) for d in star_scores}
+    s1 = scores.get("1 star", 0.0)
+    s2 = scores.get("2 stars", 0.0)
+    s3 = scores.get("3 stars", 0.0)
+    s4 = scores.get("4 stars", 0.0)
+    s5 = scores.get("5 stars", 0.0)
+    neg = s1 + s2
+    neu = s3
+    pos = s4 + s5
+    probs3 = {"Negative": neg, "Neutral": neu, "Positive": pos}
+    pred_label = max(probs3, key=probs3.get)
+    confidence = probs3[pred_label]
+    # Top star label for reference
+    top_star_label = max(
+        ["1 star", "2 stars", "3 stars", "4 stars", "5 stars"],
+        key=lambda k: {"1 star": s1, "2 stars": s2, "3 stars": s3, "4 stars": s4, "5 stars": s5}[k]
+    )
+    return pred_label, confidence, probs3, top_star_label
+def analyze(text):
+    if not text or not text.strip():
+        return "❌ Please enter some text.", "", "", LOG_PATH
+    # Ask pipeline for all class scores (needed to aggregate)
+    star_results = clf(text, return_all_scores=True)[0]  # list of 5 dicts
+    pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)
+    polarity = {
+        "Positive": "😊 Positive",
+        "Neutral": "😐 Neutral",
+        "Negative": "☹️ Negative",
+    }[pred_label]
+    # Log to Excel
+    try:
+        df = pd.read_excel(LOG_PATH)
+    except Exception:
+        df = pd.DataFrame(columns=[
+            "timestamp", "text", "predicted_label_3class", "confidence_3class",
+            "stars_probs", "top_star_label"
+        ])
+    new_row = {
+        "timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
+        "text": text,
+        "predicted_label_3class": pred_label,
+        "confidence_3class": round(conf, 4),
+        "stars_probs": str({d["label"]: round(float(d["score"]), 4) for d in star_results}),
+        "top_star_label": top_star,
+    }
+    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
+    df.to_excel(LOG_PATH, index=False)
+    # Display nicely
+    return (
+        f"Sentiment: {pred_label}",
+        f"Confidence: {conf:.3f}",   # 0..1
+        f"Polarity: {polarity}",
+        LOG_PATH
+    )
+# -------- Gradio UI --------
 with gr.Blocks() as demo:
+    gr.Markdown(
+        "## 🌍 Multilingual Sentiment Analysis (Positive • Neutral • Negative)\n"
+        "**Languages:** English, Urdu, Roman Urdu  \n"
+        "Model: `nlptown/bert-base-multilingual-uncased-sentiment` (mapped from 5★ → 3 classes)"
+    )
+    user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
+    btn = gr.Button("Analyze")
+    out_sent = gr.Textbox(label="Sentiment")
+    out_conf = gr.Textbox(label="Confidence (0–1)")
+    out_pol  = gr.Textbox(label="Polarity")
+    out_file = gr.File(label="Download logs (.xlsx)")
+    btn.click(analyze, inputs=user_text, outputs=[out_sent, out_conf, out_pol, out_file])
 if __name__ == "__main__":
+    demo.launch()