Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,76 +1,123 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
#
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
with gr.Blocks() as demo:
|
| 52 |
-
gr.Markdown(
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
user_input = gr.Textbox(label="Enter your text", placeholder="Type in English, Urdu, or Roman Urdu...")
|
| 58 |
-
|
| 59 |
-
analyze_btn = gr.Button("Analyze Sentiment")
|
| 60 |
-
|
| 61 |
-
with gr.Row():
|
| 62 |
-
sentiment_output = gr.Textbox(label="Sentiment")
|
| 63 |
-
confidence_output = gr.Textbox(label="Confidence Score")
|
| 64 |
-
polarity_output = gr.Textbox(label="Polarity")
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
| 72 |
|
|
|
|
| 73 |
|
| 74 |
-
# Run app
|
| 75 |
if __name__ == "__main__":
|
| 76 |
-
demo.launch()
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Multilingual Sentiment Analysis (English β’ Urdu β’ Roman Urdu)
|
| 3 |
+
-------------------------------------------------------------
|
| 4 |
+
β’ Uses Hugging Face model: nlptown/bert-base-multilingual-uncased-sentiment (5-star output)
|
| 5 |
+
β’ Maps 5-star probabilities to 3 classes:
|
| 6 |
+
Negative = P(1β
) + P(2β
)
|
| 7 |
+
Neutral = P(3β
)
|
| 8 |
+
Positive = P(4β
) + P(5β
)
|
| 9 |
+
β’ Saves each query to sentiment_logs.xlsx (downloadable)
|
| 10 |
+
"""
|
| 11 |
|
| 12 |
+
import os
|
| 13 |
+
from datetime import datetime
|
| 14 |
+
import pandas as pd
|
| 15 |
+
import gradio as gr
|
| 16 |
+
from transformers import pipeline
|
| 17 |
+
|
| 18 |
+
# -------- Model & Pipeline --------
|
| 19 |
+
# This model supports many languages (incl. English/Urdu/Roman Urdu)
|
| 20 |
+
MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
|
| 21 |
+
clf = pipeline("sentiment-analysis", model=MODEL_NAME)
|
| 22 |
+
|
| 23 |
+
# -------- Logging setup --------
|
| 24 |
+
LOG_PATH = "sentiment_logs.xlsx"
|
| 25 |
+
if not os.path.exists(LOG_PATH):
|
| 26 |
+
pd.DataFrame(columns=[
|
| 27 |
+
"timestamp", "text", "predicted_label_3class", "confidence_3class",
|
| 28 |
+
"stars_probs", "top_star_label"
|
| 29 |
+
]).to_excel(LOG_PATH, index=False)
|
| 30 |
+
|
| 31 |
+
def _aggregate_to_3class(star_scores):
|
| 32 |
+
"""
|
| 33 |
+
star_scores: list of dicts like:
|
| 34 |
+
[{'label': '1 star', 'score': 0.05}, ..., {'label': '5 stars', 'score': 0.6}]
|
| 35 |
+
Returns: (pred_label, confidence, probs_dict, top_star_label)
|
| 36 |
+
"""
|
| 37 |
+
# Normalize keys (some labels are singular/plural)
|
| 38 |
+
scores = {d["label"].lower(): float(d["score"]) for d in star_scores}
|
| 39 |
+
s1 = scores.get("1 star", 0.0)
|
| 40 |
+
s2 = scores.get("2 stars", 0.0)
|
| 41 |
+
s3 = scores.get("3 stars", 0.0)
|
| 42 |
+
s4 = scores.get("4 stars", 0.0)
|
| 43 |
+
s5 = scores.get("5 stars", 0.0)
|
| 44 |
+
|
| 45 |
+
neg = s1 + s2
|
| 46 |
+
neu = s3
|
| 47 |
+
pos = s4 + s5
|
| 48 |
+
|
| 49 |
+
probs3 = {"Negative": neg, "Neutral": neu, "Positive": pos}
|
| 50 |
+
pred_label = max(probs3, key=probs3.get)
|
| 51 |
+
confidence = probs3[pred_label]
|
| 52 |
+
|
| 53 |
+
# Top star label for reference
|
| 54 |
+
top_star_label = max(
|
| 55 |
+
["1 star", "2 stars", "3 stars", "4 stars", "5 stars"],
|
| 56 |
+
key=lambda k: {"1 star": s1, "2 stars": s2, "3 stars": s3, "4 stars": s4, "5 stars": s5}[k]
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
return pred_label, confidence, probs3, top_star_label
|
| 60 |
+
|
| 61 |
+
def analyze(text):
|
| 62 |
+
if not text or not text.strip():
|
| 63 |
+
return "β Please enter some text.", "", "", LOG_PATH
|
| 64 |
+
|
| 65 |
+
# Ask pipeline for all class scores (needed to aggregate)
|
| 66 |
+
star_results = clf(text, return_all_scores=True)[0] # list of 5 dicts
|
| 67 |
+
|
| 68 |
+
pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)
|
| 69 |
+
|
| 70 |
+
polarity = {
|
| 71 |
+
"Positive": "π Positive",
|
| 72 |
+
"Neutral": "π Neutral",
|
| 73 |
+
"Negative": "βΉοΈ Negative",
|
| 74 |
+
}[pred_label]
|
| 75 |
+
|
| 76 |
+
# Log to Excel
|
| 77 |
+
try:
|
| 78 |
+
df = pd.read_excel(LOG_PATH)
|
| 79 |
+
except Exception:
|
| 80 |
+
df = pd.DataFrame(columns=[
|
| 81 |
+
"timestamp", "text", "predicted_label_3class", "confidence_3class",
|
| 82 |
+
"stars_probs", "top_star_label"
|
| 83 |
+
])
|
| 84 |
+
|
| 85 |
+
new_row = {
|
| 86 |
+
"timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
|
| 87 |
+
"text": text,
|
| 88 |
+
"predicted_label_3class": pred_label,
|
| 89 |
+
"confidence_3class": round(conf, 4),
|
| 90 |
+
"stars_probs": str({d["label"]: round(float(d["score"]), 4) for d in star_results}),
|
| 91 |
+
"top_star_label": top_star,
|
| 92 |
+
}
|
| 93 |
+
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
|
| 94 |
+
df.to_excel(LOG_PATH, index=False)
|
| 95 |
+
|
| 96 |
+
# Display nicely
|
| 97 |
+
return (
|
| 98 |
+
f"Sentiment: {pred_label}",
|
| 99 |
+
f"Confidence: {conf:.3f}", # 0..1
|
| 100 |
+
f"Polarity: {polarity}",
|
| 101 |
+
LOG_PATH
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
# -------- Gradio UI --------
|
| 105 |
with gr.Blocks() as demo:
|
| 106 |
+
gr.Markdown(
|
| 107 |
+
"## π Multilingual Sentiment Analysis (Positive β’ Neutral β’ Negative)\n"
|
| 108 |
+
"**Languages:** English, Urdu, Roman Urdu \n"
|
| 109 |
+
"Model: `nlptown/bert-base-multilingual-uncased-sentiment` (mapped from 5β
β 3 classes)"
|
| 110 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
+
user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
|
| 113 |
+
btn = gr.Button("Analyze")
|
| 114 |
|
| 115 |
+
out_sent = gr.Textbox(label="Sentiment")
|
| 116 |
+
out_conf = gr.Textbox(label="Confidence (0β1)")
|
| 117 |
+
out_pol = gr.Textbox(label="Polarity")
|
| 118 |
+
out_file = gr.File(label="Download logs (.xlsx)")
|
| 119 |
|
| 120 |
+
btn.click(analyze, inputs=user_text, outputs=[out_sent, out_conf, out_pol, out_file])
|
| 121 |
|
|
|
|
| 122 |
if __name__ == "__main__":
|
| 123 |
+
demo.launch()
|