tahamueed23 commited on
Commit
fd36e32
ยท
verified ยท
1 Parent(s): 55d0499

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +471 -40
app.py CHANGED
@@ -1,50 +1,481 @@
1
  import gradio as gr
2
- import pandas as pd
3
  from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- # Load models
6
- urdu_model = pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu")
7
- roman_urdu_model = pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu")
8
- english_model = pipeline("sentiment-analysis", model="tahamueed23/sentiment_roberta_english_finetuned")
9
-
10
- # DataFrame to store results
11
- results_df = pd.DataFrame(columns=["Sentence", "Sentiment"])
12
-
13
- def analyze_sentiment(sentence):
14
- global results_df
15
- if any(unicode.isdigit() for unicode in sentence):
16
- return "Invalid input. Please enter a valid sentence without numbers."
17
-
18
- if all('\u0600' <= char <= '\u06FF' for char in sentence): # Check if Urdu
19
- model = urdu_model
20
- elif all('a' <= char <= 'z' or 'A' <= char <= 'Z' for char in sentence): # Check if English
21
- model = english_model
22
- elif any(char.isalpha() for char in sentence): # Check if Roman Urdu
23
- model = roman_urdu_model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  else:
25
- return "Unsupported language."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- sentiment = model(sentence)[0]
28
- results_df = results_df.append({"Sentence": sentence, "Sentiment": sentiment['label']}, ignore_index=True)
29
- return sentiment['label']
30
 
31
- def save_to_csv():
32
- if results_df.empty:
33
- return "No results to save."
34
- results_df.to_csv("sentiment_analysis_results.csv", index=False)
35
- return "Results saved to sentiment_analysis_results.csv."
36
 
37
- # Create Gradio Interface
38
- with gr.Blocks() as demo:
39
- gr.Markdown("# Sentiment Analysis Tool")
40
- input_text = gr.TextArea(label="Enter your sentence here:", placeholder="Type sentence...")
41
- analyze_button = gr.Button("Analyze")
42
- sentiment_output = gr.Label(label="Sentiment Output")
 
43
 
44
- analyze_button.click(fn=analyze_sentiment, inputs=input_text, outputs=sentiment_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- save_button = gr.Button("Save to CSV")
47
- save_output = gr.Label(label="Save Output")
48
- save_button.click(fn=save_to_csv, outputs=save_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  from transformers import pipeline
3
+ import pandas as pd
4
+ import os
5
+ import re
6
+ from datetime import datetime
7
+ from filelock import FileLock
8
+ import unicodedata
9
+
10
+ # ==========================================
11
+ # MODEL LOADING
12
+ # ==========================================
13
+ print("๐Ÿ”„ Loading models...")
14
+
15
+ try:
16
+ # Load sentiment models
17
+ english_model = pipeline(
18
+ "sentiment-analysis",
19
+ model="tahamueed23/sentiment_roberta_english_finetuned"
20
+ )
21
+
22
+ # Same model for both Urdu and Roman Urdu as per your requirements
23
+ urdu_roman_model = pipeline(
24
+ "sentiment-analysis",
25
+ model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
26
+ )
27
+
28
+ print("โœ… All models loaded successfully!")
29
+
30
+ except Exception as e:
31
+ print(f"โŒ Error loading models: {e}")
32
+ raise
33
 
34
+ # ==========================================
35
+ # LANGUAGE DETECTION
36
+ # ==========================================
37
+
38
+ def contains_urdu_script(text):
39
+ """Check if text contains Urdu/Arabic script"""
40
+ urdu_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\uFB50-\uFDFF\uFE70-\uFEFF]')
41
+ return bool(urdu_pattern.search(text))
42
+
43
+ def is_roman_urdu(text):
44
+ """Detect Roman Urdu using comprehensive word patterns"""
45
+ text_lower = text.lower().strip()
46
+
47
+ # Roman Urdu specific words
48
+ roman_urdu_markers = {
49
+ # Common verbs and helping verbs
50
+ 'hai', 'hain', 'tha', 'thi', 'thay', 'ho', 'hun', 'hoon', 'hein', 'he', 'hy',
51
+ # Pronouns
52
+ 'main', 'mein', 'mai', 'tum', 'wo', 'woh', 'ye', 'yeh', 'ap', 'aap',
53
+ # Prepositions
54
+ 'ka', 'ki', 'ke', 'ko', 'se', 'ne', 'par', 'pe',
55
+ # Common words
56
+ 'nahi', 'nhi', 'nahin', 'kya', 'kyun', 'kaise', 'kese', 'kahan', 'kab',
57
+ # Sentiment words
58
+ 'acha', 'achy', 'achha', 'accha', 'achi', 'bura', 'kharab', 'behtar',
59
+ 'zabardast', 'bekar', 'bekaar', 'bohot', 'bohat', 'bahut', 'bhot',
60
+ # Action words
61
+ 'karo', 'karna', 'karna', 'karein', 'kiya', 'kia', 'gaya', 'gayi', 'gaye',
62
+ 'dena', 'lena', 'dekho', 'dekha', 'suno', 'suna', 'samjho', 'samjha',
63
+ # Conjunctions
64
+ 'aur', 'or', 'lekin', 'magar', 'ya', 'phir', 'to', 'toh',
65
+ # Time words
66
+ 'ab', 'abhi', 'kal', 'parso', 'aj', 'aaj',
67
+ # Common expressions
68
+ 'sath', 'saath', 'pas', 'paas', 'dur', 'door', 'sab', 'kuch', 'koi'
69
+ }
70
+
71
+ # Tokenize text
72
+ words = re.findall(r'\b\w+\b', text_lower)
73
+
74
+ if not words:
75
+ return False
76
+
77
+ # Count Roman Urdu markers
78
+ marker_count = sum(1 for word in words if word in roman_urdu_markers)
79
+ marker_ratio = marker_count / len(words)
80
+
81
+ # Detection thresholds
82
+ if len(words) <= 3:
83
+ # For very short text, need at least one marker
84
+ return marker_count >= 1
85
+ elif len(words) <= 8:
86
+ # For short text, need 25% markers
87
+ return marker_ratio >= 0.25
88
+ else:
89
+ # For longer text, need 20% markers
90
+ return marker_ratio >= 0.20
91
+
92
+ def detect_language(text):
93
+ """
94
+ Detect language with high accuracy
95
+ Returns: 'English', 'Urdu', or 'Roman Urdu'
96
+ """
97
+ if not text or not text.strip():
98
+ return "English"
99
+
100
+ text = text.strip()
101
+
102
+ # Check for Urdu script (most reliable)
103
+ if contains_urdu_script(text):
104
+ return "Urdu"
105
+
106
+ # Check for Roman Urdu patterns
107
+ if is_roman_urdu(text):
108
+ return "Roman Urdu"
109
+
110
+ # Default to English
111
+ return "English"
112
+
113
+ # ==========================================
114
+ # SENTIMENT ANALYSIS
115
+ # ==========================================
116
+
117
+ def normalize_label(label):
118
+ """Normalize sentiment labels from different models"""
119
+ label_lower = str(label).lower()
120
+
121
+ if 'pos' in label_lower or 'positive' in label_lower:
122
+ return "Positive"
123
+ elif 'neg' in label_lower or 'negative' in label_lower:
124
+ return "Negative"
125
+ elif 'neu' in label_lower or 'neutral' in label_lower:
126
+ return "Neutral"
127
  else:
128
+ return label
129
+
130
+ def get_sentiment_emoji(sentiment):
131
+ """Return emoji for sentiment"""
132
+ emoji_map = {
133
+ "Positive": "๐Ÿ˜Š",
134
+ "Negative": "๐Ÿ˜ž",
135
+ "Neutral": "๐Ÿ˜"
136
+ }
137
+ return emoji_map.get(sentiment, "")
138
+
139
+ def analyze_sentiment(text, language):
140
+ """
141
+ Perform sentiment analysis based on detected language
142
+ """
143
+ try:
144
+ # Truncate text if too long
145
+ text_input = text[:512]
146
+
147
+ # Choose model based on language
148
+ if language == "English":
149
+ result = english_model(text_input)[0]
150
+ else: # Urdu or Roman Urdu
151
+ result = urdu_roman_model(text_input)[0]
152
+
153
+ # Extract and normalize results
154
+ sentiment = normalize_label(result['label'])
155
+ confidence = round(float(result['score']), 4)
156
+
157
+ return sentiment, confidence
158
+
159
+ except Exception as e:
160
+ print(f"Error in sentiment analysis: {e}")
161
+ return "Error", 0.0
162
 
163
+ # ==========================================
164
+ # CSV LOGGING
165
+ # ==========================================
166
 
167
+ CSV_FILE = "sentiment_analysis_logs.csv"
168
+ LOCK_FILE = CSV_FILE + ".lock"
 
 
 
169
 
170
+ def initialize_csv():
171
+ """Initialize CSV file if it doesn't exist"""
172
+ if not os.path.exists(CSV_FILE):
173
+ df = pd.DataFrame(columns=[
174
+ "Timestamp", "Text", "Language", "Sentiment", "Confidence"
175
+ ])
176
+ df.to_csv(CSV_FILE, index=False, encoding='utf-8-sig')
177
 
178
+ def save_to_csv(text, language, sentiment, confidence):
179
+ """Save analysis result to CSV with file locking"""
180
+ try:
181
+ with FileLock(LOCK_FILE, timeout=10):
182
+ # Read existing data
183
+ if os.path.exists(CSV_FILE):
184
+ df = pd.read_csv(CSV_FILE, encoding='utf-8-sig')
185
+ else:
186
+ df = pd.DataFrame(columns=[
187
+ "Timestamp", "Text", "Language", "Sentiment", "Confidence"
188
+ ])
189
+
190
+ # Add new row
191
+ new_row = pd.DataFrame([{
192
+ "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
193
+ "Text": text,
194
+ "Language": language,
195
+ "Sentiment": sentiment,
196
+ "Confidence": confidence
197
+ }])
198
+
199
+ df = pd.concat([df, new_row], ignore_index=True)
200
+
201
+ # Save to CSV
202
+ df.to_csv(CSV_FILE, index=False, encoding='utf-8-sig')
203
+
204
+ return True
205
+ except Exception as e:
206
+ print(f"Error saving to CSV: {e}")
207
+ return False
208
 
209
+ def load_logs():
210
+ """Load recent logs from CSV"""
211
+ try:
212
+ if os.path.exists(CSV_FILE):
213
+ df = pd.read_csv(CSV_FILE, encoding='utf-8-sig')
214
+ # Return last 50 entries, most recent first
215
+ return df.tail(50).iloc[::-1]
216
+ else:
217
+ return pd.DataFrame(columns=[
218
+ "Timestamp", "Text", "Language", "Sentiment", "Confidence"
219
+ ])
220
+ except Exception as e:
221
+ print(f"Error loading logs: {e}")
222
+ return pd.DataFrame(columns=[
223
+ "Timestamp", "Text", "Language", "Sentiment", "Confidence"
224
+ ])
225
+
226
+ def clear_logs():
227
+ """Clear all logs"""
228
+ try:
229
+ if os.path.exists(CSV_FILE):
230
+ os.remove(CSV_FILE)
231
+ initialize_csv()
232
+ return pd.DataFrame(columns=[
233
+ "Timestamp", "Text", "Language", "Sentiment", "Confidence"
234
+ ])
235
+ except Exception as e:
236
+ print(f"Error clearing logs: {e}")
237
+ return load_logs()
238
+
239
+ # ==========================================
240
+ # MAIN ANALYSIS FUNCTION
241
+ # ==========================================
242
+
243
+ def process_sentiment(text):
244
+ """
245
+ Main function to process sentiment analysis
246
+ """
247
+ if not text or not text.strip():
248
+ return (
249
+ "",
250
+ "",
251
+ "",
252
+ "",
253
+ load_logs(),
254
+ CSV_FILE
255
+ )
256
+
257
+ # Detect language
258
+ language = detect_language(text)
259
+
260
+ # Analyze sentiment
261
+ sentiment, confidence = analyze_sentiment(text, language)
262
+
263
+ # Format results
264
+ emoji = get_sentiment_emoji(sentiment)
265
+ result_text = f"{emoji} {sentiment}"
266
+ confidence_text = f"{confidence:.2%}"
267
+
268
+ # Create detailed result
269
+ detail = f"**Language:** {language}\n**Sentiment:** {sentiment}\n**Confidence:** {confidence:.4f}"
270
+
271
+ # Save to CSV
272
+ save_to_csv(text, language, sentiment, confidence)
273
+
274
+ # Load updated logs
275
+ logs = load_logs()
276
+
277
+ return (
278
+ result_text,
279
+ confidence_text,
280
+ language,
281
+ detail,
282
+ logs,
283
+ CSV_FILE
284
+ )
285
+
286
+ # ==========================================
287
+ # GRADIO INTERFACE
288
+ # ==========================================
289
+
290
+ # Initialize CSV on startup
291
+ initialize_csv()
292
+
293
+ # Custom CSS for better styling
294
+ custom_css = """
295
+ .container {
296
+ max-width: 1400px;
297
+ margin: auto;
298
+ }
299
+ .header {
300
+ text-align: center;
301
+ padding: 20px;
302
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
303
+ color: white;
304
+ border-radius: 10px;
305
+ margin-bottom: 20px;
306
+ }
307
+ .result-box {
308
+ font-size: 24px;
309
+ font-weight: bold;
310
+ text-align: center;
311
+ padding: 20px;
312
+ border-radius: 10px;
313
+ background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
314
+ color: white;
315
+ }
316
+ .confidence-box {
317
+ font-size: 20px;
318
+ text-align: center;
319
+ padding: 15px;
320
+ border-radius: 10px;
321
+ background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
322
+ color: white;
323
+ }
324
+ .stats-box {
325
+ padding: 15px;
326
+ border-radius: 8px;
327
+ background: #f8f9fa;
328
+ border: 1px solid #dee2e6;
329
+ }
330
+ """
331
+
332
+ # Create Gradio interface
333
+ with gr.Blocks(css=custom_css, title="Sentiment Analysis - Student Feedback") as demo:
334
+
335
+ # Header
336
+ gr.HTML("""
337
+ <div class="header">
338
+ <h1>๐ŸŽ“ Student Feedback Sentiment Analysis</h1>
339
+ <p style="font-size: 18px; margin-top: 10px;">
340
+ Multilingual Support: English โ€ข ุงุฑุฏูˆ โ€ข Roman Urdu
341
+ </p>
342
+ </div>
343
+ """)
344
+
345
+ # Main content
346
+ with gr.Row():
347
+ # Left column - Input
348
+ with gr.Column(scale=1):
349
+ gr.Markdown("### ๐Ÿ“ Enter Feedback")
350
+
351
+ input_text = gr.Textbox(
352
+ label="Student Feedback",
353
+ placeholder="Enter feedback in English, Urdu, or Roman Urdu...\nPress Enter or click Analyze",
354
+ lines=5,
355
+ max_lines=10
356
+ )
357
+
358
+ with gr.Row():
359
+ analyze_btn = gr.Button("๐Ÿ” Analyze Sentiment", variant="primary", scale=2)
360
+ clear_btn = gr.Button("๐Ÿ—‘๏ธ Clear Logs", variant="secondary", scale=1)
361
+
362
+ # Right column - Results
363
+ with gr.Column(scale=1):
364
+ gr.Markdown("### ๐Ÿ“Š Analysis Results")
365
+
366
+ with gr.Row():
367
+ sentiment_output = gr.Textbox(
368
+ label="Sentiment",
369
+ interactive=False,
370
+ elem_classes="result-box"
371
+ )
372
+ confidence_output = gr.Textbox(
373
+ label="Confidence",
374
+ interactive=False,
375
+ elem_classes="confidence-box"
376
+ )
377
+
378
+ language_output = gr.Textbox(
379
+ label="Detected Language",
380
+ interactive=False
381
+ )
382
+
383
+ detail_output = gr.Markdown(
384
+ label="Details",
385
+ value=""
386
+ )
387
+
388
+ # Bottom section - Logs and Export
389
+ gr.Markdown("---")
390
+ gr.Markdown("### ๐Ÿ“‹ Analysis History")
391
+
392
+ with gr.Row():
393
+ logs_display = gr.Dataframe(
394
+ headers=["Timestamp", "Text", "Language", "Sentiment", "Confidence"],
395
+ datatype=["str", "str", "str", "str", "number"],
396
+ label="Recent Analyses",
397
+ wrap=True,
398
+ interactive=False,
399
+ value=load_logs()
400
+ )
401
+
402
+ with gr.Row():
403
+ export_file = gr.File(
404
+ label="๐Ÿ“ฅ Download Complete Logs (CSV)",
405
+ value=CSV_FILE,
406
+ interactive=False
407
+ )
408
+ gr.Markdown("""
409
+ **๐Ÿ’ก Tips:**
410
+ - Type your feedback and press **Enter** or click **Analyze**
411
+ - Supports English, Urdu (ุงุฑุฏูˆ), and Roman Urdu
412
+ - All analyses are automatically saved
413
+ - Download CSV for complete history
414
+ """)
415
+
416
+ # Model information
417
+ gr.Markdown("---")
418
+ with gr.Accordion("โ„น๏ธ Model Information", open=False):
419
+ gr.Markdown("""
420
+ **Models Used:**
421
+ - **English:** tahamueed23/sentiment_roberta_english_finetuned
422
+ - **Urdu & Roman Urdu:** tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu
423
+
424
+ **Features:**
425
+ - Automatic language detection
426
+ - High-accuracy sentiment classification
427
+ - Real-time analysis
428
+ - CSV export for data analysis
429
+ - Support for mixed feedback in different languages
430
+ """)
431
+
432
+ # Event handlers
433
+ def process_and_update(text):
434
+ return process_sentiment(text)
435
+
436
+ # Click event
437
+ analyze_btn.click(
438
+ fn=process_and_update,
439
+ inputs=[input_text],
440
+ outputs=[
441
+ sentiment_output,
442
+ confidence_output,
443
+ language_output,
444
+ detail_output,
445
+ logs_display,
446
+ export_file
447
+ ]
448
+ )
449
+
450
+ # Enter key event
451
+ input_text.submit(
452
+ fn=process_and_update,
453
+ inputs=[input_text],
454
+ outputs=[
455
+ sentiment_output,
456
+ confidence_output,
457
+ language_output,
458
+ detail_output,
459
+ logs_display,
460
+ export_file
461
+ ]
462
+ )
463
+
464
+ # Clear logs event
465
+ clear_btn.click(
466
+ fn=clear_logs,
467
+ inputs=[],
468
+ outputs=[logs_display]
469
+ )
470
 
471
+ # Launch the app
472
+ if __name__ == "__main__":
473
+ print("\n" + "="*50)
474
+ print("๐Ÿš€ Starting Sentiment Analysis Application")
475
+ print("="*50 + "\n")
476
+ demo.launch(
477
+ share=False,
478
+ show_error=True,
479
+ server_name="0.0.0.0",
480
+ server_port=7860
481
+ )