import gradio as gr from transformers import pipeline import pandas as pd import os import re from datetime import datetime from filelock import FileLock import unicodedata # ========================================== # MODEL LOADING # ========================================== print("πŸ”„ Loading models...") try: # Load sentiment models english_model = pipeline( "sentiment-analysis", model="tahamueed23/sentiment_roberta_english_finetuned" ) # Same model for both Urdu and Roman Urdu as per your requirements urdu_roman_model = pipeline( "sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu" ) urdu_model = pipeline( "sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu" ) print("βœ… All models loaded successfully!") except Exception as e: print(f"❌ Error loading models: {e}") raise # ========================================== # LANGUAGE DETECTION # ========================================== def contains_urdu_script(text): """Check if text contains Urdu/Arabic script""" urdu_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\uFB50-\uFDFF\uFE70-\uFEFF]') return bool(urdu_pattern.search(text)) def is_roman_urdu(text): """Detect Roman Urdu using comprehensive word patterns""" text_lower = text.lower().strip() # Roman Urdu specific words roman_urdu_markers = { # Common verbs and helping verbs 'hai', 'hain', 'tha', 'thi', 'thay', 'ho', 'hun', 'hoon', 'hein', 'he', 'hy', # Pronouns 'main', 'mein', 'mai', 'tum', 'wo', 'woh', 'ye', 'yeh', 'ap', 'aap', # Prepositions 'ka', 'ki', 'ke', 'ko', 'se', 'ne', 'par', 'pe', # Common words 'nahi', 'nhi', 'nahin', 'kya', 'kyun', 'kaise', 'kese', 'kahan', 'kab', # Sentiment words 'acha', 'achy', 'achha', 'accha', 'achi', 'bura', 'kharab', 'behtar', 'zabardast', 'bekar', 'bekaar', 'bohot', 'bohat', 'bahut', 'bhot', # Action words 'karo', 'karna', 'karna', 'karein', 'kiya', 'kia', 'gaya', 'gayi', 'gaye', 'dena', 'lena', 'dekho', 'dekha', 'suno', 'suna', 'samjho', 'samjha', # Conjunctions 'aur', 'or', 'lekin', 'magar', 'ya', 'phir', 'to', 'toh', # Time words 'ab', 'abhi', 'kal', 'parso', 'aj', 'aaj', # Common expressions 'sath', 'saath', 'pas', 'paas', 'dur', 'door', 'sab', 'kuch', 'koi' } # Tokenize text words = re.findall(r'\b\w+\b', text_lower) if not words: return False # Count Roman Urdu markers marker_count = sum(1 for word in words if word in roman_urdu_markers) marker_ratio = marker_count / len(words) # Detection thresholds if len(words) <= 3: # For very short text, need at least one marker return marker_count >= 1 elif len(words) <= 8: # For short text, need 25% markers return marker_ratio >= 0.25 else: # For longer text, need 20% markers return marker_ratio >= 0.20 def detect_language(text): """ Detect language with high accuracy Returns: 'English', 'Urdu', or 'Roman Urdu' """ if not text or not text.strip(): return "English" text = text.strip() # Check for Urdu script (most reliable) if contains_urdu_script(text): return "Urdu" # Check for Roman Urdu patterns if is_roman_urdu(text): return "Roman Urdu" # Default to English return "English" # ========================================== # SENTIMENT ANALYSIS # ========================================== def normalize_label(label): """Normalize sentiment labels from different models""" label_lower = str(label).lower() if 'pos' in label_lower or 'positive' in label_lower: return "Positive" elif 'neg' in label_lower or 'negative' in label_lower: return "Negative" elif 'neu' in label_lower or 'neutral' in label_lower: return "Neutral" else: return label def get_sentiment_emoji(sentiment): """Return emoji for sentiment""" emoji_map = { "Positive": "😊", "Negative": "😞", "Neutral": "😐" } return emoji_map.get(sentiment, "") def analyze_sentiment(text, language): """ Perform sentiment analysis based on detected language """ try: # Truncate text if too long text_input = text[:512] # Choose model based on language if language == "English": result = english_model(text_input)[0] else: # Urdu or Roman Urdu result = urdu_roman_model(text_input)[0] # Extract and normalize results sentiment = normalize_label(result['label']) confidence = round(float(result['score']), 4) return sentiment, confidence except Exception as e: print(f"Error in sentiment analysis: {e}") return "Error", 0.0 # ========================================== # CSV LOGGING # ========================================== CSV_FILE = "sentiment_analysis_logs.csv" LOCK_FILE = CSV_FILE + ".lock" def initialize_csv(): """Initialize CSV file if it doesn't exist""" if not os.path.exists(CSV_FILE): df = pd.DataFrame(columns=[ "Timestamp", "Text", "Language", "Sentiment", "Confidence" ]) df.to_csv(CSV_FILE, index=False, encoding='utf-8-sig') def save_to_csv(text, language, sentiment, confidence): """Save analysis result to CSV with file locking""" try: with FileLock(LOCK_FILE, timeout=10): # Read existing data if os.path.exists(CSV_FILE): df = pd.read_csv(CSV_FILE, encoding='utf-8-sig') else: df = pd.DataFrame(columns=[ "Timestamp", "Text", "Language", "Sentiment", "Confidence" ]) # Add new row new_row = pd.DataFrame([{ "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "Text": text, "Language": language, "Sentiment": sentiment, "Confidence": confidence }]) df = pd.concat([df, new_row], ignore_index=True) # Save to CSV df.to_csv(CSV_FILE, index=False, encoding='utf-8-sig') return True except Exception as e: print(f"Error saving to CSV: {e}") return False def load_logs(): """Load recent logs from CSV""" try: if os.path.exists(CSV_FILE): df = pd.read_csv(CSV_FILE, encoding='utf-8-sig') # Return last 50 entries, most recent first return df.tail(50).iloc[::-1] else: return pd.DataFrame(columns=[ "Timestamp", "Text", "Language", "Sentiment", "Confidence" ]) except Exception as e: print(f"Error loading logs: {e}") return pd.DataFrame(columns=[ "Timestamp", "Text", "Language", "Sentiment", "Confidence" ]) def clear_logs(): """Clear all logs""" try: if os.path.exists(CSV_FILE): os.remove(CSV_FILE) initialize_csv() return pd.DataFrame(columns=[ "Timestamp", "Text", "Language", "Sentiment", "Confidence" ]) except Exception as e: print(f"Error clearing logs: {e}") return load_logs() # ========================================== # MAIN ANALYSIS FUNCTION # ========================================== def process_sentiment(text): """ Main function to process sentiment analysis """ if not text or not text.strip(): return ( "", "", "", "", load_logs(), CSV_FILE ) # Detect language language = detect_language(text) # Analyze sentiment sentiment, confidence = analyze_sentiment(text, language) # Format results emoji = get_sentiment_emoji(sentiment) result_text = f"{emoji} {sentiment}" confidence_text = f"{confidence:.2%}" # Create detailed result detail = f"**Language:** {language}\n**Sentiment:** {sentiment}\n**Confidence:** {confidence:.4f}" # Save to CSV save_to_csv(text, language, sentiment, confidence) # Load updated logs logs = load_logs() return ( result_text, confidence_text, language, detail, logs, CSV_FILE ) # ========================================== # GRADIO INTERFACE # ========================================== # Initialize CSV on startup initialize_csv() # Create Gradio interface with gr.Blocks(title="Sentiment Analysis - Student Feedback") as demo: # Header gr.Markdown(""" # πŸŽ“ Student Feedback Sentiment Analysis ### Multilingual Support: English β€’ اردو β€’ Roman Urdu """) gr.Markdown("---") # Main content with gr.Row(): # Left column - Input with gr.Column(scale=1): gr.Markdown("### πŸ“ Enter Feedback") input_text = gr.Textbox( label="Student Feedback", placeholder="Enter feedback in English, Urdu, or Roman Urdu...\nPress Enter or click Analyze", lines=5, max_lines=10 ) with gr.Row(): analyze_btn = gr.Button("πŸ” Analyze Sentiment", variant="primary", scale=2) clear_btn = gr.Button("πŸ—‘οΈ Clear Logs", variant="secondary", scale=1) # Right column - Results with gr.Column(scale=1): gr.Markdown("### πŸ“Š Analysis Results") with gr.Row(): sentiment_output = gr.Textbox( label="Sentiment", interactive=False ) confidence_output = gr.Textbox( label="Confidence", interactive=False ) language_output = gr.Textbox( label="Detected Language", interactive=False ) detail_output = gr.Markdown( label="Details", value="" ) # Bottom section - Logs and Export gr.Markdown("---") gr.Markdown("### πŸ“‹ Analysis History") with gr.Row(): logs_display = gr.Dataframe( headers=["Timestamp", "Text", "Language", "Sentiment", "Confidence"], datatype=["str", "str", "str", "str", "number"], label="Recent Analyses", wrap=True, interactive=False, value=load_logs() ) with gr.Row(): export_file = gr.File( label="πŸ“₯ Download Complete Logs (CSV)", value=CSV_FILE, interactive=False ) gr.Markdown(""" **πŸ’‘ Tips:** - Type your feedback and press **Enter** or click **Analyze** - Supports English, Urdu (اردو), and Roman Urdu - All analyses are automatically saved - Download CSV for complete history """) # Model information gr.Markdown("---") with gr.Accordion("ℹ️ Model Information", open=False): gr.Markdown(""" **Models Used:** - **English:** tahamueed23/sentiment_roberta_english_finetuned - **Urdu & Roman Urdu:** tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu **Features:** - Automatic language detection - High-accuracy sentiment classification - Real-time analysis - CSV export for data analysis - Support for mixed feedback in different languages **Important Note:** - If you’re facing problems, it’s because you didn’t learn, so go educate yourself before others😊. """) # Event handlers def process_and_update(text): return process_sentiment(text) # Click event analyze_btn.click( fn=process_and_update, inputs=[input_text], outputs=[ sentiment_output, confidence_output, language_output, detail_output, logs_display, export_file ] ) # Enter key event input_text.submit( fn=process_and_update, inputs=[input_text], outputs=[ sentiment_output, confidence_output, language_output, detail_output, logs_display, export_file ] ) # Clear logs event clear_btn.click( fn=clear_logs, inputs=[], outputs=[logs_display] ) # Launch the app if __name__ == "__main__": print("\n" + "="*50) print("πŸš€ Starting Sentiment Analysis Application") print("="*50 + "\n") demo.launch( share=False, show_error=True, server_name="0.0.0.0", server_port=7860 )