Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import pandas as pd | |
| import os | |
| import re | |
| from datetime import datetime | |
| from filelock import FileLock | |
| import unicodedata | |
| # ========================================== | |
| # MODEL LOADING | |
| # ========================================== | |
| print("🔄 Loading models...") | |
| try: | |
| # Load sentiment models | |
| english_model = pipeline( | |
| "sentiment-analysis", | |
| model="tahamueed23/sentiment_roberta_english_finetuned" | |
| ) | |
| # Same model for both Urdu and Roman Urdu as per your requirements | |
| urdu_roman_model = pipeline( | |
| "sentiment-analysis", | |
| model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu" | |
| ) | |
| urdu_model = pipeline( | |
| "sentiment-analysis", | |
| model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu" | |
| ) | |
| print("✅ All models loaded successfully!") | |
| except Exception as e: | |
| print(f"❌ Error loading models: {e}") | |
| raise | |
| # ========================================== | |
| # LANGUAGE DETECTION | |
| # ========================================== | |
| def contains_urdu_script(text): | |
| """Check if text contains Urdu/Arabic script""" | |
| urdu_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\uFB50-\uFDFF\uFE70-\uFEFF]') | |
| return bool(urdu_pattern.search(text)) | |
| def is_roman_urdu(text): | |
| """Detect Roman Urdu using comprehensive word patterns""" | |
| text_lower = text.lower().strip() | |
| # Roman Urdu specific words | |
| roman_urdu_markers = { | |
| # Common verbs and helping verbs | |
| 'hai', 'hain', 'tha', 'thi', 'thay', 'ho', 'hun', 'hoon', 'hein', 'he', 'hy', | |
| # Pronouns | |
| 'main', 'mein', 'mai', 'tum', 'wo', 'woh', 'ye', 'yeh', 'ap', 'aap', | |
| # Prepositions | |
| 'ka', 'ki', 'ke', 'ko', 'se', 'ne', 'par', 'pe', | |
| # Common words | |
| 'nahi', 'nhi', 'nahin', 'kya', 'kyun', 'kaise', 'kese', 'kahan', 'kab', | |
| # Sentiment words | |
| 'acha', 'achy', 'achha', 'accha', 'achi', 'bura', 'kharab', 'behtar', | |
| 'zabardast', 'bekar', 'bekaar', 'bohot', 'bohat', 'bahut', 'bhot', | |
| # Action words | |
| 'karo', 'karna', 'karna', 'karein', 'kiya', 'kia', 'gaya', 'gayi', 'gaye', | |
| 'dena', 'lena', 'dekho', 'dekha', 'suno', 'suna', 'samjho', 'samjha', | |
| # Conjunctions | |
| 'aur', 'or', 'lekin', 'magar', 'ya', 'phir', 'to', 'toh', | |
| # Time words | |
| 'ab', 'abhi', 'kal', 'parso', 'aj', 'aaj', | |
| # Common expressions | |
| 'sath', 'saath', 'pas', 'paas', 'dur', 'door', 'sab', 'kuch', 'koi' | |
| } | |
| # Tokenize text | |
| words = re.findall(r'\b\w+\b', text_lower) | |
| if not words: | |
| return False | |
| # Count Roman Urdu markers | |
| marker_count = sum(1 for word in words if word in roman_urdu_markers) | |
| marker_ratio = marker_count / len(words) | |
| # Detection thresholds | |
| if len(words) <= 3: | |
| # For very short text, need at least one marker | |
| return marker_count >= 1 | |
| elif len(words) <= 8: | |
| # For short text, need 25% markers | |
| return marker_ratio >= 0.25 | |
| else: | |
| # For longer text, need 20% markers | |
| return marker_ratio >= 0.20 | |
| def detect_language(text): | |
| """ | |
| Detect language with high accuracy | |
| Returns: 'English', 'Urdu', or 'Roman Urdu' | |
| """ | |
| if not text or not text.strip(): | |
| return "English" | |
| text = text.strip() | |
| # Check for Urdu script (most reliable) | |
| if contains_urdu_script(text): | |
| return "Urdu" | |
| # Check for Roman Urdu patterns | |
| if is_roman_urdu(text): | |
| return "Roman Urdu" | |
| # Default to English | |
| return "English" | |
| # ========================================== | |
| # SENTIMENT ANALYSIS | |
| # ========================================== | |
| def normalize_label(label): | |
| """Normalize sentiment labels from different models""" | |
| label_lower = str(label).lower() | |
| if 'pos' in label_lower or 'positive' in label_lower: | |
| return "Positive" | |
| elif 'neg' in label_lower or 'negative' in label_lower: | |
| return "Negative" | |
| elif 'neu' in label_lower or 'neutral' in label_lower: | |
| return "Neutral" | |
| else: | |
| return label | |
| def get_sentiment_emoji(sentiment): | |
| """Return emoji for sentiment""" | |
| emoji_map = { | |
| "Positive": "😊", | |
| "Negative": "😞", | |
| "Neutral": "😐" | |
| } | |
| return emoji_map.get(sentiment, "") | |
| def analyze_sentiment(text, language): | |
| """ | |
| Perform sentiment analysis based on detected language | |
| """ | |
| try: | |
| # Truncate text if too long | |
| text_input = text[:512] | |
| # Choose model based on language | |
| if language == "English": | |
| result = english_model(text_input)[0] | |
| else: # Urdu or Roman Urdu | |
| result = urdu_roman_model(text_input)[0] | |
| # Extract and normalize results | |
| sentiment = normalize_label(result['label']) | |
| confidence = round(float(result['score']), 4) | |
| return sentiment, confidence | |
| except Exception as e: | |
| print(f"Error in sentiment analysis: {e}") | |
| return "Error", 0.0 | |
| # ========================================== | |
| # CSV LOGGING | |
| # ========================================== | |
| CSV_FILE = "sentiment_analysis_logs.csv" | |
| LOCK_FILE = CSV_FILE + ".lock" | |
| def initialize_csv(): | |
| """Initialize CSV file if it doesn't exist""" | |
| if not os.path.exists(CSV_FILE): | |
| df = pd.DataFrame(columns=[ | |
| "Timestamp", "Text", "Language", "Sentiment", "Confidence" | |
| ]) | |
| df.to_csv(CSV_FILE, index=False, encoding='utf-8-sig') | |
| def save_to_csv(text, language, sentiment, confidence): | |
| """Save analysis result to CSV with file locking""" | |
| try: | |
| with FileLock(LOCK_FILE, timeout=10): | |
| # Read existing data | |
| if os.path.exists(CSV_FILE): | |
| df = pd.read_csv(CSV_FILE, encoding='utf-8-sig') | |
| else: | |
| df = pd.DataFrame(columns=[ | |
| "Timestamp", "Text", "Language", "Sentiment", "Confidence" | |
| ]) | |
| # Add new row | |
| new_row = pd.DataFrame([{ | |
| "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
| "Text": text, | |
| "Language": language, | |
| "Sentiment": sentiment, | |
| "Confidence": confidence | |
| }]) | |
| df = pd.concat([df, new_row], ignore_index=True) | |
| # Save to CSV | |
| df.to_csv(CSV_FILE, index=False, encoding='utf-8-sig') | |
| return True | |
| except Exception as e: | |
| print(f"Error saving to CSV: {e}") | |
| return False | |
| def load_logs(): | |
| """Load recent logs from CSV""" | |
| try: | |
| if os.path.exists(CSV_FILE): | |
| df = pd.read_csv(CSV_FILE, encoding='utf-8-sig') | |
| # Return last 50 entries, most recent first | |
| return df.tail(50).iloc[::-1] | |
| else: | |
| return pd.DataFrame(columns=[ | |
| "Timestamp", "Text", "Language", "Sentiment", "Confidence" | |
| ]) | |
| except Exception as e: | |
| print(f"Error loading logs: {e}") | |
| return pd.DataFrame(columns=[ | |
| "Timestamp", "Text", "Language", "Sentiment", "Confidence" | |
| ]) | |
| def clear_logs(): | |
| """Clear all logs""" | |
| try: | |
| if os.path.exists(CSV_FILE): | |
| os.remove(CSV_FILE) | |
| initialize_csv() | |
| return pd.DataFrame(columns=[ | |
| "Timestamp", "Text", "Language", "Sentiment", "Confidence" | |
| ]) | |
| except Exception as e: | |
| print(f"Error clearing logs: {e}") | |
| return load_logs() | |
| # ========================================== | |
| # MAIN ANALYSIS FUNCTION | |
| # ========================================== | |
| def process_sentiment(text): | |
| """ | |
| Main function to process sentiment analysis | |
| """ | |
| if not text or not text.strip(): | |
| return ( | |
| "", | |
| "", | |
| "", | |
| "", | |
| load_logs(), | |
| CSV_FILE | |
| ) | |
| # Detect language | |
| language = detect_language(text) | |
| # Analyze sentiment | |
| sentiment, confidence = analyze_sentiment(text, language) | |
| # Format results | |
| emoji = get_sentiment_emoji(sentiment) | |
| result_text = f"{emoji} {sentiment}" | |
| confidence_text = f"{confidence:.2%}" | |
| # Create detailed result | |
| detail = f"**Language:** {language}\n**Sentiment:** {sentiment}\n**Confidence:** {confidence:.4f}" | |
| # Save to CSV | |
| save_to_csv(text, language, sentiment, confidence) | |
| # Load updated logs | |
| logs = load_logs() | |
| return ( | |
| result_text, | |
| confidence_text, | |
| language, | |
| detail, | |
| logs, | |
| CSV_FILE | |
| ) | |
| # ========================================== | |
| # GRADIO INTERFACE | |
| # ========================================== | |
| # Initialize CSV on startup | |
| initialize_csv() | |
| # Create Gradio interface | |
| with gr.Blocks(title="Sentiment Analysis - Student Feedback") as demo: | |
| # Header | |
| gr.Markdown(""" | |
| # 🎓 Student Feedback Sentiment Analysis | |
| ### Multilingual Support: English • اردو • Roman Urdu | |
| """) | |
| gr.Markdown("---") | |
| # Main content | |
| with gr.Row(): | |
| # Left column - Input | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📝 Enter Feedback") | |
| input_text = gr.Textbox( | |
| label="Student Feedback", | |
| placeholder="Enter feedback in English, Urdu, or Roman Urdu...\nPress Enter or click Analyze", | |
| lines=5, | |
| max_lines=10 | |
| ) | |
| with gr.Row(): | |
| analyze_btn = gr.Button("🔍 Analyze Sentiment", variant="primary", scale=2) | |
| clear_btn = gr.Button("🗑️ Clear Logs", variant="secondary", scale=1) | |
| # Right column - Results | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📊 Analysis Results") | |
| with gr.Row(): | |
| sentiment_output = gr.Textbox( | |
| label="Sentiment", | |
| interactive=False | |
| ) | |
| confidence_output = gr.Textbox( | |
| label="Confidence", | |
| interactive=False | |
| ) | |
| language_output = gr.Textbox( | |
| label="Detected Language", | |
| interactive=False | |
| ) | |
| detail_output = gr.Markdown( | |
| label="Details", | |
| value="" | |
| ) | |
| # Bottom section - Logs and Export | |
| gr.Markdown("---") | |
| gr.Markdown("### 📋 Analysis History") | |
| with gr.Row(): | |
| logs_display = gr.Dataframe( | |
| headers=["Timestamp", "Text", "Language", "Sentiment", "Confidence"], | |
| datatype=["str", "str", "str", "str", "number"], | |
| label="Recent Analyses", | |
| wrap=True, | |
| interactive=False, | |
| value=load_logs() | |
| ) | |
| with gr.Row(): | |
| export_file = gr.File( | |
| label="📥 Download Complete Logs (CSV)", | |
| value=CSV_FILE, | |
| interactive=False | |
| ) | |
| gr.Markdown(""" | |
| **💡 Tips:** | |
| - Type your feedback and press **Enter** or click **Analyze** | |
| - Supports English, Urdu (اردو), and Roman Urdu | |
| - All analyses are automatically saved | |
| - Download CSV for complete history | |
| """) | |
| # Model information | |
| gr.Markdown("---") | |
| with gr.Accordion("ℹ️ Model Information", open=False): | |
| gr.Markdown(""" | |
| **Models Used:** | |
| - **English:** tahamueed23/sentiment_roberta_english_finetuned | |
| - **Urdu & Roman Urdu:** tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu | |
| **Features:** | |
| - Automatic language detection | |
| - High-accuracy sentiment classification | |
| - Real-time analysis | |
| - CSV export for data analysis | |
| - Support for mixed feedback in different languages | |
| **Important Note:** | |
| - If you’re facing problems, it’s because you didn’t learn, so go educate yourself before others😊. | |
| """) | |
| # Event handlers | |
| def process_and_update(text): | |
| return process_sentiment(text) | |
| # Click event | |
| analyze_btn.click( | |
| fn=process_and_update, | |
| inputs=[input_text], | |
| outputs=[ | |
| sentiment_output, | |
| confidence_output, | |
| language_output, | |
| detail_output, | |
| logs_display, | |
| export_file | |
| ] | |
| ) | |
| # Enter key event | |
| input_text.submit( | |
| fn=process_and_update, | |
| inputs=[input_text], | |
| outputs=[ | |
| sentiment_output, | |
| confidence_output, | |
| language_output, | |
| detail_output, | |
| logs_display, | |
| export_file | |
| ] | |
| ) | |
| # Clear logs event | |
| clear_btn.click( | |
| fn=clear_logs, | |
| inputs=[], | |
| outputs=[logs_display] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| print("\n" + "="*50) | |
| print("🚀 Starting Sentiment Analysis Application") | |
| print("="*50 + "\n") | |
| demo.launch( | |
| share=False, | |
| show_error=True, | |
| server_name="0.0.0.0", | |
| server_port=7860 | |
| ) |