tahamueed23 commited on
Commit
55d0499
·
verified ·
1 Parent(s): f864a10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -531
app.py CHANGED
@@ -1,540 +1,50 @@
1
  import gradio as gr
2
- from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
3
  import pandas as pd
4
- import os
5
- import re
6
- from filelock import FileLock
7
- import torch
8
-
9
- # -----------------------------
10
- # Load Models with Error Handling
11
- # -----------------------------
12
- try:
13
- # English sentiment model
14
- english_model = pipeline(
15
- "sentiment-analysis",
16
- model="siebert/sentiment-roberta-large-english"
17
- )
18
-
19
- # Urdu sentiment model
20
- urdu_model = pipeline(
21
- "sentiment-analysis",
22
- model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
23
- )
24
-
25
- # Roman Urdu sentiment model
26
- roman_urdu_model = pipeline(
27
- "sentiment-analysis",
28
- model="tahamueed23/roman-urdu-sentiment"
29
- )
30
-
31
- # Language detection model
32
- lang_detector = pipeline(
33
- "text-classification",
34
- model="papluca/xlm-roberta-base-language-detection"
35
- )
36
-
37
- print("✅ All models loaded successfully!")
38
-
39
- except Exception as e:
40
- print(f"❌ Error loading models: {e}")
41
- raise
42
-
43
- # -----------------------------
44
- # Roman Urdu Word Databases
45
- # -----------------------------
46
- ROMAN_URDU_POSITIVE_WORDS = {
47
- 'acha', 'achy', 'achay', 'achi', 'behtar', 'zabardast', 'shandaar', 'umdah', 'umda',
48
- 'behtareen', 'kamaal', 'lajawab', 'mazedar', 'khush', 'khushi', 'pasand', 'pasandida',
49
- 'pyaara', 'pyaari', 'dilchasp', 'mufeed', 'pursukoon', 'roshan', 'saaf', 'suthri',
50
- 'tareef', 'targheeb', 'madadgar', 'dostana', 'jawab', 'khoob', 'khoobsurat', 'heran',
51
- 'mast', 'rangeen', 'sundar', 'sohna', 'sohni', 'pyara', 'pyari', 'meetha', 'meethi',
52
- 'mitha', 'mithi', 'azhar', 'badtameez', 'accha', 'acchi', 'acche'
53
- }
54
-
55
- ROMAN_URDU_NEGATIVE_WORDS = {
56
- 'kharab', 'bura', 'ganda', 'sust', 'kamzor', 'mushkil', 'naqis', 'namukammal',
57
- 'mayus', 'nakara', 'bekaar', 'bemisi', 'bepanah', 'beparwah', 'behos', 'bekhauf',
58
- 'bekhudi', 'bekhabar', 'bekasoor', 'bekar', 'bemari', 'bezaar', 'badsurat', 'badtameez',
59
- 'kameena', 'nalaiq', 'nakara', 'ghatiya', 'bakwas', 'bewakoof', 'ahmaq', 'murda',
60
- 'zaleel', 'kambakht', 'laanat', 'harami', 'bad', 'worst', 'waste', 'rubbish'
61
- }
62
-
63
- ROMAN_URDU_NEUTRAL_WORDS = {
64
- 'hai', 'hain', 'tha', 'thi', 'ho', 'hun', 'hein', 'main', 'tum', 'wo', 'ye', 'unhon',
65
- 'inhon', 'sath', 'lekin', 'kyun', 'jaisa', 'waisa', 'jese', 'wese', 'phir', 'ab', 'toh',
66
- 'ka', 'ki', 'ke', 'ko', 'se', 'mein', 'par', 'aur', 'ya', 'kya', 'kuch', 'sab', 'apna'
67
- }
68
-
69
- # Compile regex patterns for faster matching
70
- roman_urdu_positive_pattern = re.compile(r'\b(' + '|'.join(ROMAN_URDU_POSITIVE_WORDS) + r')\b', re.IGNORECASE)
71
- roman_urdu_negative_pattern = re.compile(r'\b(' + '|'.join(ROMAN_URDU_NEGATIVE_WORDS) + r')\b', re.IGNORECASE)
72
-
73
- # -----------------------------
74
- # Enhanced Language Detection
75
- # -----------------------------
76
- def detect_language_advanced(text):
77
- """Advanced language detection using model + rules"""
78
- if not text.strip():
79
- return "English"
80
-
81
- text_clean = text.strip()
82
-
83
- # Step 1: Urdu script detection (most reliable)
84
- if re.search(r'[\u0600-\u06FF]', text_clean):
85
- return "Urdu"
86
-
87
- # Step 2: Use transformer model for language detection
88
- try:
89
- # Truncate very long texts to avoid model limits
90
- truncated_text = text_clean[:250]
91
- lang_result = lang_detector(truncated_text)[0]
92
- lang_label = lang_result['label'].upper()
93
- lang_score = lang_result['score']
94
-
95
- # Map model outputs to our language categories
96
- lang_map = {
97
- 'UR': 'Urdu',
98
- 'EN': 'English',
99
- 'Ro-Ur': 'English', # Hindi often mixed with Roman Urdu
100
- }
101
-
102
- detected_lang = lang_map.get(lang_label, 'English')
103
-
104
- # Step 3: For Urdu/English detection, apply Roman Urdu rules
105
- if detected_lang in ['Urdu', 'English']:
106
- if is_likely_roman_urdu(text_clean):
107
- return "Roman Urdu"
108
-
109
- return detected_lang
110
-
111
- except Exception as e:
112
- print(f"Language detection model error: {e}")
113
- # Fallback to rule-based detection
114
- return detect_language_fallback(text_clean)
115
-
116
- def is_likely_roman_urdu(text):
117
- """Check if text is likely Roman Urdu using comprehensive rules"""
118
- text_lower = text.lower()
119
-
120
- # Count Roman Urdu specific words
121
- positive_hits = len(roman_urdu_positive_pattern.findall(text_lower))
122
- negative_hits = len(roman_urdu_negative_pattern.findall(text_lower))
123
- total_hits = positive_hits + negative_hits
124
-
125
- # Count total words
126
- words = re.findall(r'\b\w+\b', text_lower)
127
- total_words = len(words)
128
-
129
- if total_words == 0:
130
- return False
131
-
132
- # Rule 1: High percentage of Roman Urdu words
133
- roman_urdu_ratio = total_hits / total_words
134
- if roman_urdu_ratio > 0.3: # 30% threshold
135
- return True
136
-
137
- # Rule 2: Specific Roman Urdu sentence structures
138
- roman_urdu_patterns = [
139
- r"^[a-z ]*(hai|hain|tha|thi|ho|hun|hein)[\s\.\!]*$",
140
- r"^[a-z ]*(main|tum|wo|ye|unhon|inhon)[a-z ]*(hun|hein|ho|hai)[a-z ]*$",
141
- r"^[a-z ]*(acha|bura|kharab|behtar|zabardast)[a-z ]*(hai|hain|tha)[a-z ]*$",
142
- r"^[a-z ]*(kyun|kese|kaise|kisne|kisliye)[a-z ]*\?$",
143
- r"^[a-z ]*(bohat|bahut|zyada|zyda)[a-z ]+(acha|bura|kharab|behtar)"
144
- ]
145
-
146
- for pattern in roman_urdu_patterns:
147
- if re.search(pattern, text_lower):
148
- return True
149
-
150
- # Rule 3: Presence of key Roman Urdu function words
151
- function_words = ['hai', 'hain', 'tha', 'thi', 'ka', 'ki', 'ke', 'ko', 'se', 'ne']
152
- function_word_count = sum(1 for word in words if word in function_words)
153
-
154
- if function_word_count >= 2 and total_words <= 8:
155
- return True
156
-
157
- return False
158
-
159
- def detect_language_fallback(text):
160
- """Rule-based fallback language detection"""
161
- text_lower = text.lower()
162
-
163
- # Urdu script check
164
- if re.search(r'[\u0600-\u06FF]', text):
165
- return "Urdu"
166
-
167
- # Roman Urdu detection
168
- if is_likely_roman_urdu(text):
169
- return "Roman Urdu"
170
-
171
- return "English"
172
-
173
- # -----------------------------
174
- # Roman Urdu Text Processing
175
- # -----------------------------
176
- def normalize_roman_urdu(text):
177
- """Normalize Roman Urdu text variations"""
178
- text = text.lower().strip()
179
-
180
- # Common Roman Urdu spelling variations
181
- variations = {
182
- r'\bhy\b': 'hai', r'\bh\b': 'hai', r'\bhe\b': 'hai',
183
- r'\bnhi\b': 'nahi', r'\bnai\b': 'nahi', r'\bna\b': 'nahi',
184
- r'\bboht\b': 'bohot', r'\bbhot\b': 'bohot', r'\bbahut\b': 'bohot',
185
- r'\bzyada\b': 'zyada', r'\bzada\b': 'zyada', r'\bzyda\b': 'zyada',
186
- r'\bacha\b': 'acha', r'\bachay\b': 'achay', r'\bacchi\b': 'achi',
187
- r'\bacche\b': 'achay', r'\bthy\b': 'thay', r'\bthi\b': 'thi',
188
- r'\btha\b': 'tha', r'\bmje\b': 'mujhe', r'\btuje\b': 'tujhe',
189
- r'\busi\b': 'ussi', r'\besi\b': 'essi', r'\bwohi\b': 'wohi',
190
- r'\bkisi\b': 'kisi', r'\bkuch\b': 'kuch', r'\bsab\b': 'sab',
191
- r'\bme\b': 'main', r'\bmai\b': 'main', r'\btu\b': 'tum',
192
- r'\buss\b': 'us', r'\biss\b': 'is'
193
- }
194
-
195
- for pattern, replacement in variations.items():
196
- text = re.sub(pattern, replacement, text)
197
-
198
- return text
199
-
200
- # -----------------------------
201
- # Roman Urdu Sentiment Correction
202
- # -----------------------------
203
- def correct_roman_urdu_sentiment(text, current_sentiment, current_score):
204
- """Apply Roman Urdu specific sentiment corrections"""
205
- text_lower = text.lower()
206
- normalized_text = normalize_roman_urdu(text_lower)
207
-
208
- # Count positive and negative words
209
- positive_matches = roman_urdu_positive_pattern.findall(normalized_text)
210
- negative_matches = roman_urdu_negative_pattern.findall(normalized_text)
211
-
212
- positive_count = len(positive_matches)
213
- negative_count = len(negative_matches)
214
-
215
- # Strong positive indicators
216
- strong_positive_indicators = ['acha', 'achy', 'achay', 'achi', 'zabardast', 'shandaar', 'kamaal']
217
- strong_negative_indicators = ['kharab', 'bura', 'ganda', 'bekaar', 'badtameez']
218
-
219
- # Rule 1: If text contains strong positive words but model says negative, correct it
220
- has_strong_positive = any(indicator in normalized_text for indicator in strong_positive_indicators)
221
- has_strong_negative = any(indicator in normalized_text for indicator in strong_negative_indicators)
222
-
223
- if has_strong_positive and current_sentiment == "Negative":
224
- return "Positive", max(current_score, 0.85)
225
-
226
- if has_strong_negative and current_sentiment == "Positive":
227
- return "Negative", max(current_score, 0.85)
228
-
229
- # Rule 2: Word count based correction
230
- if positive_count > negative_count and current_sentiment == "Negative":
231
- new_score = min(0.8 + (positive_count * 0.05), 0.95)
232
- return "Positive", new_score
233
-
234
- if negative_count > positive_count and current_sentiment == "Positive":
235
- new_score = min(0.8 + (negative_count * 0.05), 0.95)
236
- return "Negative", new_score
237
-
238
- # Rule 3: Mixed sentiments with clear majority
239
- total_sentiment_words = positive_count + negative_count
240
- if total_sentiment_words >= 2:
241
- positive_ratio = positive_count / total_sentiment_words
242
-
243
- if positive_ratio >= 0.7 and current_sentiment != "Positive":
244
- return "Positive", 0.8
245
- elif positive_ratio <= 0.3 and current_sentiment != "Negative":
246
- return "Negative", 0.8
247
-
248
- return current_sentiment, current_score
249
-
250
- # -----------------------------
251
- # Enhanced Ensemble for Roman Urdu
252
- # -----------------------------
253
- def ensemble_roman_urdu_sentiment(text):
254
- """Advanced ensemble method for Roman Urdu sentiment analysis"""
255
- normalized_text = normalize_roman_urdu(text)
256
-
257
- try:
258
- # Get predictions from both Roman Urdu and Urdu models
259
- ru_result = roman_urdu_model(normalized_text)[0]
260
- ur_result = urdu_model(normalized_text)[0]
261
-
262
- # Normalize labels
263
- ru_sentiment = normalize_sentiment_label(ru_result["label"])
264
- ur_sentiment = normalize_sentiment_label(ur_result["label"])
265
- ru_score = ru_result["score"]
266
- ur_score = ur_result["score"]
267
-
268
- # Apply Roman Urdu corrections to both results
269
- ru_sentiment_corrected, ru_score_corrected = correct_roman_urdu_sentiment(text, ru_sentiment, ru_score)
270
- ur_sentiment_corrected, ur_score_corrected = correct_roman_urdu_sentiment(text, ur_sentiment, ur_score)
271
-
272
- # If both models agree after correction
273
- if ru_sentiment_corrected == ur_sentiment_corrected:
274
- final_score = max(ru_score_corrected, ur_score_corrected)
275
- return {"label": ru_sentiment_corrected, "score": final_score}
276
-
277
- # Weighted voting with higher weight for Roman Urdu model
278
- ru_weight = ru_score_corrected * 1.6 # Higher weight for Roman Urdu model
279
- ur_weight = ur_score_corrected * 1.2
280
-
281
- if ru_weight > ur_weight:
282
- return {"label": ru_sentiment_corrected, "score": ru_score_corrected}
283
- else:
284
- return {"label": ur_sentiment_corrected, "score": ur_score_corrected}
285
-
286
- except Exception as e:
287
- print(f"Ensemble error: {e}")
288
- # Fallback to Roman Urdu model with correction
289
- try:
290
- result = roman_urdu_model(normalize_roman_urdu(text))[0]
291
- corrected_sentiment, corrected_score = correct_roman_urdu_sentiment(
292
- text, normalize_sentiment_label(result["label"]), result["score"]
293
- )
294
- return {"label": corrected_sentiment, "score": corrected_score}
295
- except:
296
- return {"label": "Neutral", "score": 0.5}
297
-
298
- # -----------------------------
299
- # Sentiment Analysis Core Functions
300
- # -----------------------------
301
- def normalize_sentiment_label(label):
302
- """Normalize sentiment labels from different models"""
303
- label = str(label).lower()
304
-
305
- if any(word in label for word in ["pos", "positive", "positive", "lab"]):
306
- return "Positive"
307
- elif any(word in label for word in ["neg", "negative", "negative"]):
308
- return "Negative"
309
  else:
310
- return "Neutral"
311
 
312
- def get_strong_sentiment_words(text, language):
313
- """Extract strong sentiment-bearing words"""
314
- text_lower = text.lower()
315
- strong_words = []
316
-
317
- if language == "Roman Urdu":
318
- # Use our Roman Urdu word databases
319
- positive_matches = roman_urdu_positive_pattern.findall(text_lower)
320
- negative_matches = roman_urdu_negative_pattern.findall(text_lower)
321
- strong_words = positive_matches + negative_matches
322
- elif language == "Urdu":
323
- # Urdu strong words (you can expand this list)
324
- urdu_positive = ['زبردست', 'شاندار', 'عمدہ', 'بہترین', 'اچھا']
325
- urdu_negative = ['خراب', 'برا', 'مایوس کن', 'بیکار']
326
- for word in urdu_positive + urdu_negative:
327
- if word in text:
328
- strong_words.append(word)
329
- else: # English
330
- english_positive = ['excellent', 'outstanding', 'amazing', 'wonderful', 'perfect', 'great']
331
- english_negative = ['terrible', 'awful', 'horrible', 'disappointing', 'poor', 'bad']
332
- for word in english_positive + english_negative:
333
- if re.search(r'\b' + re.escape(word) + r'\b', text_lower):
334
- strong_words.append(word)
335
-
336
- return list(set(strong_words))[:5] # Return unique words, max 5
337
 
338
- def generate_detailed_explanation(text, sentiment, score, language, strong_words):
339
- """Generate detailed explanation for sentiment analysis"""
340
-
341
- confidence_level = "High" if score >= 0.8 else "Medium" if score >= 0.6 else "Low"
342
-
343
- base_explanations = {
344
- "Positive": {
345
- "High": "Strong positive sentiment with clear positive expressions.",
346
- "Medium": "Moderately positive sentiment with favorable tone.",
347
- "Low": "Slightly positive leaning with some positive indicators."
348
- },
349
- "Negative": {
350
- "High": "Strong negative sentiment with clear criticism.",
351
- "Medium": "Moderately negative sentiment with critical tone.",
352
- "Low": "Slightly negative leaning with some concerning indicators."
353
- },
354
- "Neutral": {
355
- "High": "Clearly neutral or factual statement.",
356
- "Medium": "Mostly neutral with balanced perspective.",
357
- "Low": "Weak sentiment leaning neutral."
358
- }
359
- }
360
-
361
- explanation = base_explanations[sentiment][confidence_level]
362
-
363
- # Add language specific notes
364
- if language == "Roman Urdu":
365
- explanation += " Analyzed with Roman Urdu specific rules."
366
-
367
- # Special note for common corrections
368
- if any(word in text.lower() for word in ['acha', 'achy', 'achay', 'achi']):
369
- if sentiment == "Positive":
370
- explanation += " Words like 'acha' correctly identified as positive."
371
-
372
- # Add strong words information
373
- if strong_words:
374
- explanation += f" Key sentiment words: {', '.join(strong_words)}."
375
-
376
- explanation += f" Confidence: {score:.3f}"
377
-
378
- return explanation
379
-
380
- # -----------------------------
381
- # Main Analysis Function
382
- # -----------------------------
383
- SAVE_FILE = "sentiment_logs.csv"
384
- LOCK_FILE = SAVE_FILE + ".lock"
385
-
386
- if not os.path.exists(SAVE_FILE):
387
- pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"]).to_csv(
388
- SAVE_FILE, index=False, encoding="utf-8-sig"
389
- )
390
-
391
- def analyze_sentiment_complete(text, lang_hint):
392
- """Complete sentiment analysis pipeline"""
393
- if not text.strip():
394
- return "⚠️ Please enter a sentence.", "", "", SAVE_FILE, ""
395
-
396
- # Detect language
397
- language = lang_hint if lang_hint != "Auto Detect" else detect_language_advanced(text)
398
-
399
- try:
400
- # Perform sentiment analysis based on language
401
- if language == "English":
402
- result = english_model(text[:512])[0]
403
- sentiment = normalize_sentiment_label(result["label"])
404
- score = round(float(result["score"]), 3)
405
-
406
- elif language == "Urdu":
407
- result = urdu_model(text[:512])[0]
408
- sentiment = normalize_sentiment_label(result["label"])
409
- score = round(float(result["score"]), 3)
410
-
411
- else: # Roman Urdu
412
- result = ensemble_roman_urdu_sentiment(text)
413
- sentiment = result["label"]
414
- score = round(float(result["score"]), 3)
415
-
416
- # Get strong words
417
- strong_words = get_strong_sentiment_words(text, language)
418
- strong_words_str = ", ".join(strong_words) if strong_words else "None"
419
-
420
- # Generate explanation
421
- explanation = generate_detailed_explanation(text, sentiment, score, language, strong_words)
422
-
423
- # Save to CSV
424
- with FileLock(LOCK_FILE):
425
- df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig") if os.path.exists(SAVE_FILE) else pd.DataFrame(
426
- columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"]
427
- )
428
- new_row = pd.DataFrame([[
429
- text, language, sentiment, score, strong_words_str, pd.Timestamp.now()
430
- ]], columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"])
431
- df = pd.concat([df, new_row], ignore_index=True)
432
- df.to_csv(SAVE_FILE, index=False, encoding="utf-8-sig")
433
-
434
- return sentiment, str(score), explanation, SAVE_FILE, strong_words_str
435
-
436
- except Exception as e:
437
- error_msg = f"Analysis error: {str(e)}"
438
- return "Error", "0", error_msg, SAVE_FILE, ""
439
-
440
- # -----------------------------
441
- # Gradio Interface
442
- # -----------------------------
443
- def show_logs():
444
- if os.path.exists(SAVE_FILE):
445
- df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig")
446
- return df.tail(20)
447
- else:
448
- return pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"])
449
 
450
- def clear_logs():
451
- if os.path.exists(SAVE_FILE):
452
- os.remove(SAVE_FILE)
453
- return pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"])
 
 
454
 
455
- with gr.Blocks(title="Multilingual Sentiment Analysis") as demo:
456
- gr.Markdown("""
457
- # 🌍 Advanced Multilingual Sentiment Analysis
458
- **English • Urdu • Roman Urdu**
459
 
460
- Uses transformer models for accurate language detection and sentiment analysis with specialized Roman Urdu handling.
461
-
462
- **Used models:**
463
- - English: siebert/sentiment-roberta-large-english
464
- - Urdu: tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu
465
- - Roman Urdu: tahamueed23/roman-urdu-sentiment
466
- - Language detection: papluca/xlm-roberta-base-language-detection
467
- """)
468
-
469
- # Top row with two columns
470
- with gr.Row():
471
- # Left column - Input section
472
- with gr.Column(scale=1):
473
- gr.Markdown("### 📥 Input Section")
474
- user_text = gr.Textbox(
475
- label="✍️ Enter Text",
476
- placeholder="Type in English, Urdu, or Roman Urdu...",
477
- lines=3
478
- )
479
- lang_dropdown = gr.Dropdown(
480
- ["Auto Detect", "English", "Urdu", "Roman Urdu"],
481
- value="Auto Detect",
482
- label="🌐 Language Selection"
483
- )
484
-
485
- with gr.Row():
486
- btn_analyze = gr.Button("🔍 Analyze Sentiment", variant="primary")
487
- btn_show = gr.Button("📂 Show Logs")
488
- btn_clear = gr.Button("🗑️ Clear Logs")
489
-
490
- # Right column - Results section
491
- with gr.Column(scale=1):
492
- gr.Markdown("### 📊 Results")
493
- with gr.Row():
494
- with gr.Column():
495
- out_sent = gr.Textbox(label="🎭 Sentiment")
496
- out_conf = gr.Textbox(label="📊 Confidence Score")
497
- with gr.Column():
498
- out_strong = gr.Textbox(label="💪 Strong Words")
499
- out_file = gr.File(label="⬇️ Download Logs")
500
-
501
- out_exp = gr.Textbox(label="💡 Detailed Explanation", lines=3)
502
-
503
- # Bottom row with analysis history taking most of the space
504
- with gr.Row():
505
- with gr.Column(scale=3): # Takes more space (75%)
506
- gr.Markdown("### 📋 Analysis History")
507
- logs_df = gr.Dataframe(
508
- headers=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"],
509
- label="",
510
- interactive=False,
511
- wrap=True
512
- )
513
- with gr.Column(scale=1): # Takes less space (25%)
514
- gr.Markdown("### ℹ️ Information")
515
- gr.Markdown("""
516
- **How to use:**
517
- 1. Enter text in any supported language
518
- 2. Select language or use Auto Detect
519
- 3. Click Analyze Sentiment
520
- 4. View results and history
521
-
522
- **Supported Languages:**
523
- - English
524
- - Urdu (Script)
525
- - Roman Urdu (Latin script)
526
-
527
- **Note:** Auto Detect works best with clear text samples.
528
- """)
529
-
530
- # Event handlers
531
- btn_analyze.click(
532
- analyze_sentiment_complete,
533
- inputs=[user_text, lang_dropdown],
534
- outputs=[out_sent, out_conf, out_exp, out_file, out_strong]
535
- )
536
- btn_show.click(show_logs, outputs=[logs_df])
537
- btn_clear.click(clear_logs, outputs=[logs_df])
538
 
539
- if __name__ == "__main__":
540
- demo.launch(share=False)
 
1
  import gradio as gr
 
2
  import pandas as pd
3
+ from transformers import pipeline
4
+
5
+ # Load models
6
+ urdu_model = pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu")
7
+ roman_urdu_model = pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu")
8
+ english_model = pipeline("sentiment-analysis", model="tahamueed23/sentiment_roberta_english_finetuned")
9
+
10
+ # DataFrame to store results
11
+ results_df = pd.DataFrame(columns=["Sentence", "Sentiment"])
12
+
13
+ def analyze_sentiment(sentence):
14
+ global results_df
15
+ if any(unicode.isdigit() for unicode in sentence):
16
+ return "Invalid input. Please enter a valid sentence without numbers."
17
+
18
+ if all('\u0600' <= char <= '\u06FF' for char in sentence): # Check if Urdu
19
+ model = urdu_model
20
+ elif all('a' <= char <= 'z' or 'A' <= char <= 'Z' for char in sentence): # Check if English
21
+ model = english_model
22
+ elif any(char.isalpha() for char in sentence): # Check if Roman Urdu
23
+ model = roman_urdu_model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  else:
25
+ return "Unsupported language."
26
 
27
+ sentiment = model(sentence)[0]
28
+ results_df = results_df.append({"Sentence": sentence, "Sentiment": sentiment['label']}, ignore_index=True)
29
+ return sentiment['label']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ def save_to_csv():
32
+ if results_df.empty:
33
+ return "No results to save."
34
+ results_df.to_csv("sentiment_analysis_results.csv", index=False)
35
+ return "Results saved to sentiment_analysis_results.csv."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ # Create Gradio Interface
38
+ with gr.Blocks() as demo:
39
+ gr.Markdown("# Sentiment Analysis Tool")
40
+ input_text = gr.TextArea(label="Enter your sentence here:", placeholder="Type sentence...")
41
+ analyze_button = gr.Button("Analyze")
42
+ sentiment_output = gr.Label(label="Sentiment Output")
43
 
44
+ analyze_button.click(fn=analyze_sentiment, inputs=input_text, outputs=sentiment_output)
 
 
 
45
 
46
+ save_button = gr.Button("Save to CSV")
47
+ save_output = gr.Label(label="Save Output")
48
+ save_button.click(fn=save_to_csv, outputs=save_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ demo.launch(debug=True)