tahamueed23 commited on
Commit
82a1819
·
verified ·
1 Parent(s): dd037e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -46
app.py CHANGED
@@ -2,66 +2,118 @@ import gradio as gr
2
  from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
3
  import pandas as pd
4
  import os
 
5
 
6
  # -----------------------------
7
  # Load Models
8
  # -----------------------------
9
- # English sentiment model (CardiffNLP)
10
- en_model = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
 
 
11
 
12
- # Urdu / Roman Urdu fine-tuned model (replace with your Hugging Face repo ID)
13
- ur_model_name = "tahamueed23/roman-urdu-sentiment"
14
- ur_tokenizer = AutoTokenizer.from_pretrained(ur_model_name)
15
- ur_model = AutoModelForSequenceClassification.from_pretrained(ur_model_name)
16
- ur_pipeline = pipeline("sentiment-analysis", model=ur_model, tokenizer=ur_tokenizer)
 
 
 
 
 
17
 
18
  # -----------------------------
19
  # CSV Setup
20
  # -----------------------------
21
- csv_file = "sentiment_logs.csv"
22
- if not os.path.exists(csv_file):
23
- df = pd.DataFrame(columns=["Sentence", "Language", "Sentiment"])
24
- df.to_csv(csv_file, index=False)
25
 
26
  # -----------------------------
27
- # Processing Function
28
  # -----------------------------
29
- def analyze_sentiment(sentence, language):
30
- try:
31
- if language == "English":
32
- result = en_model(sentence)[0]
33
- else:
34
- # Urdu or Roman Urdu
35
- result = ur_pipeline(sentence)[0]
36
-
37
- label = result["label"]
38
- score = round(result["score"], 3)
39
-
40
- # Save to CSV
41
- new_row = pd.DataFrame([[sentence, language, f"{label} ({score})"]],
42
- columns=["Sentence", "Language", "Sentiment"])
43
- df = pd.read_csv(csv_file)
44
- df = pd.concat([df, new_row], ignore_index=True)
45
- df.to_csv(csv_file, index=False)
46
-
47
- # Output
48
- return f"**Language Selected:** {language}\n**Sentiment:** {label}\n**Confidence:** {score}"
49
-
50
- except Exception as e:
51
- return f"⚠️ Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- # -----------------------------
54
- # Gradio App
55
- # -----------------------------
56
  with gr.Blocks() as demo:
57
- gr.Markdown("## 🌍 Multilingual Sentiment Analysis (English, Urdu, Roman Urdu)")
58
- gr.Markdown("Enter a sentence and select the language to detect sentiment.")
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- input_text = gr.Textbox(label="Enter your sentence:")
61
- lang_dropdown = gr.Dropdown(choices=["English", "Urdu", "Roman Urdu"], label="Select Language")
62
- output_text = gr.Markdown(label="Result")
 
 
63
 
64
- btn = gr.Button("Analyze Sentiment")
65
- btn.click(analyze_sentiment, inputs=[input_text, lang_dropdown], outputs=output_text)
66
 
67
- demo.launch()
 
 
2
  from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
3
  import pandas as pd
4
  import os
5
+ import re
6
 
7
  # -----------------------------
8
  # Load Models
9
  # -----------------------------
10
+ english_model = pipeline(
11
+ "sentiment-analysis",
12
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest"
13
+ )
14
 
15
+ # Replace with your own fine-tuned models
16
+ urdu_model = pipeline(
17
+ "sentiment-analysis",
18
+ model="tahamueed23/urdu-roman-urdu-sentiment"
19
+ )
20
+
21
+ roman_urdu_model = pipeline(
22
+ "sentiment-analysis",
23
+ model="tahamueed23/urdu-roman-urdu-sentiment"
24
+ )
25
 
26
  # -----------------------------
27
  # CSV Setup
28
  # -----------------------------
29
+ SAVE_FILE = "sentiment_logs.csv"
30
+ if not os.path.exists(SAVE_FILE):
31
+ df = pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])
32
+ df.to_csv(SAVE_FILE, index=False)
33
 
34
  # -----------------------------
35
+ # Language Detection (simple rule-based)
36
  # -----------------------------
37
+ def detect_language(text):
38
+ urdu_chars = set("ابتثجحخدذرزسشصضطظعغفقکلمنوہیءآؤئۀ")
39
+ if any(ch in urdu_chars for ch in text):
40
+ return "Urdu"
41
+ roman_urdu_pattern = r"\b(hai|kia|kyun|nahi|bohot|acha|galat|sahi|parhai|ustad|pyar|dil|insaan)\b"
42
+ if re.search(roman_urdu_pattern, text.lower()):
43
+ return "Roman Urdu"
44
+ return "English"
45
+
46
+ def normalize_label(label):
47
+ label = label.lower()
48
+ if "positive" in label:
49
+ return "Positive"
50
+ elif "negative" in label:
51
+ return "Negative"
52
+ else:
53
+ return "Neutral"
54
+
55
+ def sentiment_with_tips(sentiment):
56
+ tips = {
57
+ "Positive": "😊 Great! Keep spreading positivity.",
58
+ "Negative": "😞 It seems negative. Try to focus on solutions.",
59
+ "Neutral": "😐 Neutral statement. Could go either way."
60
+ }
61
+ return tips.get(sentiment, "")
62
+
63
+ def analyze_sentiment(text, lang_hint):
64
+ if not text.strip():
65
+ return "⚠️ Please enter a sentence.", "", "", SAVE_FILE
66
+
67
+ # Auto detect if language hint is not clear
68
+ lang = lang_hint if lang_hint != "Auto Detect" else detect_language(text)
69
+
70
+ # Select model
71
+ if lang == "English":
72
+ result = english_model(text)[0]
73
+ elif lang == "Urdu":
74
+ result = urdu_model(text)[0]
75
+ else:
76
+ result = roman_urdu_model(text)[0]
77
+
78
+ # Process results
79
+ sentiment = normalize_label(result["label"])
80
+ score = round(result["score"], 3)
81
+ explanation = sentiment_with_tips(sentiment)
82
+
83
+ # Save to CSV
84
+ df = pd.read_csv(SAVE_FILE)
85
+ new_row = pd.DataFrame([[text, lang, sentiment, score]],
86
+ columns=["Sentence", "Language", "Sentiment", "Confidence"])
87
+ df = pd.concat([df, new_row], ignore_index=True)
88
+ df.to_csv(SAVE_FILE, index=False)
89
+
90
+ return f"{sentiment}", f"{score}", f"{explanation}", SAVE_FILE
91
 
 
 
 
92
  with gr.Blocks() as demo:
93
+ gr.Markdown(
94
+ "## 🌍 Multilingual Sentiment Analysis (English Urdu Roman Urdu)\n"
95
+ "Detect sentiment as **Positive, Neutral, or Negative** with confidence score.\n\n"
96
+ "📌 Features:\n"
97
+ "- Choose language (or Auto Detect)\n"
98
+ "- Download all results as CSV\n"
99
+ "- Emojis + Tips for better understanding 🎯"
100
+ )
101
+
102
+ with gr.Row():
103
+ with gr.Column():
104
+ user_text = gr.Textbox(label="✍️ Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
105
+ lang_dropdown = gr.Dropdown(["Auto Detect", "English", "Urdu", "Roman Urdu"],
106
+ label="🌐 Language", value="Auto Detect")
107
+ btn = gr.Button("🔍 Analyze")
108
 
109
+ with gr.Column():
110
+ out_sent = gr.Textbox(label="Sentiment")
111
+ out_conf = gr.Textbox(label="Confidence (0–1)")
112
+ out_exp = gr.Textbox(label="Explanation")
113
+ out_file = gr.File(label="⬇️ Download Logs (.csv)", type="filepath")
114
 
115
+ btn.click(analyze_sentiment, inputs=[user_text, lang_dropdown],
116
+ outputs=[out_sent, out_conf, out_exp, out_file])
117
 
118
+ if __name__ == "__main__":
119
+ demo.launch()