mbalvi commited on
Commit
133afa1
Β·
verified Β·
1 Parent(s): 860d714

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -69
app.py CHANGED
@@ -1,76 +1,123 @@
1
- import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
3
- import pandas as pd
4
- import os
 
 
 
 
 
 
5
 
6
- # Load multilingual model (English, Urdu, Roman Urdu)
7
- MODEL_NAME = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
8
-
9
- # Use slow tokenizer (avoid SentencePiece fast conversion error)
10
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
11
- model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
12
- sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
13
-
14
- # File to store logs
15
- OUTPUT_FILE = "sentiment_logs.xlsx"
16
-
17
- # Initialize Excel if not exists
18
- if not os.path.exists(OUTPUT_FILE):
19
- df = pd.DataFrame(columns=["User_Text", "Label", "Score"])
20
- df.to_excel(OUTPUT_FILE, index=False)
21
-
22
- def analyze_sentiment(user_text):
23
- """Analyze sentiment in English, Urdu, or Roman Urdu."""
24
- if not user_text.strip():
25
- return "❌ Please enter some text.", "", "", OUTPUT_FILE
26
-
27
- # Run model
28
- result = sentiment_pipeline(user_text)[0]
29
- label = result["label"]
30
- score = round(result["score"], 3)
31
-
32
- # Map labels for readability
33
- if "NEGATIVE" in label.upper():
34
- polarity = "☹️ Negative"
35
- elif "POSITIVE" in label.upper():
36
- polarity = "😊 Positive"
37
- else:
38
- polarity = "😐 Neutral"
39
-
40
- # Save to Excel
41
- df = pd.read_excel(OUTPUT_FILE)
42
- new_entry = pd.DataFrame([[user_text, label, score]],
43
- columns=["User_Text", "Label", "Score"])
44
- df = pd.concat([df, new_entry], ignore_index=True)
45
- df.to_excel(OUTPUT_FILE, index=False)
46
-
47
- return f"Sentiment: {label}", f"Confidence: {score}", f"Polarity: {polarity}", OUTPUT_FILE
48
-
49
-
50
- # Gradio UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  with gr.Blocks() as demo:
52
- gr.Markdown("## 🌍 Multilingual Sentiment Analysis App\n"
53
- "Supports **English, Urdu, Roman Urdu**\n"
54
- "Classifies into **Positive, Negative, Neutral**")
55
-
56
- with gr.Row():
57
- user_input = gr.Textbox(label="Enter your text", placeholder="Type in English, Urdu, or Roman Urdu...")
58
-
59
- analyze_btn = gr.Button("Analyze Sentiment")
60
-
61
- with gr.Row():
62
- sentiment_output = gr.Textbox(label="Sentiment")
63
- confidence_output = gr.Textbox(label="Confidence Score")
64
- polarity_output = gr.Textbox(label="Polarity")
65
 
66
- # Downloadable Excel file
67
- download_file = gr.File(label="Download Logs (.xlsx)")
68
 
69
- analyze_btn.click(fn=analyze_sentiment,
70
- inputs=user_input,
71
- outputs=[sentiment_output, confidence_output, polarity_output, download_file])
 
72
 
 
73
 
74
- # Run app
75
  if __name__ == "__main__":
76
- demo.launch()
 
1
+ """
2
+ Multilingual Sentiment Analysis (English β€’ Urdu β€’ Roman Urdu)
3
+ -------------------------------------------------------------
4
+ β€’ Uses Hugging Face model: nlptown/bert-base-multilingual-uncased-sentiment (5-star output)
5
+ β€’ Maps 5-star probabilities to 3 classes:
6
+ Negative = P(1β˜…) + P(2β˜…)
7
+ Neutral = P(3β˜…)
8
+ Positive = P(4β˜…) + P(5β˜…)
9
+ β€’ Saves each query to sentiment_logs.xlsx (downloadable)
10
+ """
11
 
12
+ import os
13
+ from datetime import datetime
14
+ import pandas as pd
15
+ import gradio as gr
16
+ from transformers import pipeline
17
+
18
+ # -------- Model & Pipeline --------
19
+ # This model supports many languages (incl. English/Urdu/Roman Urdu)
20
+ MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
21
+ clf = pipeline("sentiment-analysis", model=MODEL_NAME)
22
+
23
+ # -------- Logging setup --------
24
+ LOG_PATH = "sentiment_logs.xlsx"
25
+ if not os.path.exists(LOG_PATH):
26
+ pd.DataFrame(columns=[
27
+ "timestamp", "text", "predicted_label_3class", "confidence_3class",
28
+ "stars_probs", "top_star_label"
29
+ ]).to_excel(LOG_PATH, index=False)
30
+
31
+ def _aggregate_to_3class(star_scores):
32
+ """
33
+ star_scores: list of dicts like:
34
+ [{'label': '1 star', 'score': 0.05}, ..., {'label': '5 stars', 'score': 0.6}]
35
+ Returns: (pred_label, confidence, probs_dict, top_star_label)
36
+ """
37
+ # Normalize keys (some labels are singular/plural)
38
+ scores = {d["label"].lower(): float(d["score"]) for d in star_scores}
39
+ s1 = scores.get("1 star", 0.0)
40
+ s2 = scores.get("2 stars", 0.0)
41
+ s3 = scores.get("3 stars", 0.0)
42
+ s4 = scores.get("4 stars", 0.0)
43
+ s5 = scores.get("5 stars", 0.0)
44
+
45
+ neg = s1 + s2
46
+ neu = s3
47
+ pos = s4 + s5
48
+
49
+ probs3 = {"Negative": neg, "Neutral": neu, "Positive": pos}
50
+ pred_label = max(probs3, key=probs3.get)
51
+ confidence = probs3[pred_label]
52
+
53
+ # Top star label for reference
54
+ top_star_label = max(
55
+ ["1 star", "2 stars", "3 stars", "4 stars", "5 stars"],
56
+ key=lambda k: {"1 star": s1, "2 stars": s2, "3 stars": s3, "4 stars": s4, "5 stars": s5}[k]
57
+ )
58
+
59
+ return pred_label, confidence, probs3, top_star_label
60
+
61
+ def analyze(text):
62
+ if not text or not text.strip():
63
+ return "❌ Please enter some text.", "", "", LOG_PATH
64
+
65
+ # Ask pipeline for all class scores (needed to aggregate)
66
+ star_results = clf(text, return_all_scores=True)[0] # list of 5 dicts
67
+
68
+ pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)
69
+
70
+ polarity = {
71
+ "Positive": "😊 Positive",
72
+ "Neutral": "😐 Neutral",
73
+ "Negative": "☹️ Negative",
74
+ }[pred_label]
75
+
76
+ # Log to Excel
77
+ try:
78
+ df = pd.read_excel(LOG_PATH)
79
+ except Exception:
80
+ df = pd.DataFrame(columns=[
81
+ "timestamp", "text", "predicted_label_3class", "confidence_3class",
82
+ "stars_probs", "top_star_label"
83
+ ])
84
+
85
+ new_row = {
86
+ "timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
87
+ "text": text,
88
+ "predicted_label_3class": pred_label,
89
+ "confidence_3class": round(conf, 4),
90
+ "stars_probs": str({d["label"]: round(float(d["score"]), 4) for d in star_results}),
91
+ "top_star_label": top_star,
92
+ }
93
+ df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
94
+ df.to_excel(LOG_PATH, index=False)
95
+
96
+ # Display nicely
97
+ return (
98
+ f"Sentiment: {pred_label}",
99
+ f"Confidence: {conf:.3f}", # 0..1
100
+ f"Polarity: {polarity}",
101
+ LOG_PATH
102
+ )
103
+
104
+ # -------- Gradio UI --------
105
  with gr.Blocks() as demo:
106
+ gr.Markdown(
107
+ "## 🌍 Multilingual Sentiment Analysis (Positive β€’ Neutral β€’ Negative)\n"
108
+ "**Languages:** English, Urdu, Roman Urdu \n"
109
+ "Model: `nlptown/bert-base-multilingual-uncased-sentiment` (mapped from 5β˜… β†’ 3 classes)"
110
+ )
 
 
 
 
 
 
 
 
111
 
112
+ user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
113
+ btn = gr.Button("Analyze")
114
 
115
+ out_sent = gr.Textbox(label="Sentiment")
116
+ out_conf = gr.Textbox(label="Confidence (0–1)")
117
+ out_pol = gr.Textbox(label="Polarity")
118
+ out_file = gr.File(label="Download logs (.xlsx)")
119
 
120
+ btn.click(analyze, inputs=user_text, outputs=[out_sent, out_conf, out_pol, out_file])
121
 
 
122
  if __name__ == "__main__":
123
+ demo.launch()