Spaces:

tahamueed23
/

Sentiment-Analyzer

Running

App Files Files Community

tahamueed23 commited on Nov 29, 2025

Commit

80f282b

verified ·

1 Parent(s): 7861917

Update app.py

Browse files

Files changed (1) hide show

app.py +120 -492

app.py CHANGED Viewed

@@ -1,535 +1,163 @@
 import gradio as gr
-from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 import pandas as pd
 import os
-import re
-from filelock import FileLock
-import torch
-# -----------------------------
-# Load Models with Error Handling
-# -----------------------------
-try:
-    # English sentiment model
-    english_model = pipeline(
-        "sentiment-analysis",
-        model="siebert/sentiment-roberta-large-english"
-    )
-    # Urdu sentiment model
-    urdu_model = pipeline(
-        "sentiment-analysis",
-        model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
-    )
-    # Roman Urdu sentiment model
-    roman_urdu_model = pipeline(
-        "sentiment-analysis",
-        model="tahamueed23/roman-urdu-sentiment"
-    )
-    # Language detection model
-    lang_detector = pipeline(
-        "text-classification",
-        model="papluca/xlm-roberta-base-language-detection"
-    )
-    print("✅ All models loaded successfully!")
-except Exception as e:
-    print(f"❌ Error loading models: {e}")
-    raise
-# -----------------------------
-# Roman Urdu Word Databases
-# -----------------------------
-ROMAN_URDU_POSITIVE_WORDS = {
-    'acha', 'achy', 'achay', 'achi', 'behtar', 'zabardast', 'shandaar', 'umdah', 'umda',
-    'behtareen', 'kamaal', 'lajawab', 'mazedar', 'khush', 'khushi', 'pasand', 'pasandida',
-    'pyaara', 'pyaari', 'dilchasp', 'mufeed', 'pursukoon', 'roshan', 'saaf', 'suthri',
-    'tareef', 'targheeb', 'madadgar', 'dostana', 'jawab', 'khoob', 'khoobsurat', 'heran',
-    'mast', 'rangeen', 'sundar', 'sohna', 'sohni', 'pyara', 'pyari', 'meetha', 'meethi',
-    'mitha', 'mithi', 'azhar', 'badtameez', 'accha', 'acchi', 'acche'
-}
-ROMAN_URDU_NEGATIVE_WORDS = {
-    'kharab', 'bura', 'ganda', 'sust', 'kamzor', 'mushkil', 'naqis', 'namukammal',
-    'mayus', 'nakara', 'bekaar', 'bemisi', 'bepanah', 'beparwah', 'behos', 'bekhauf',
-    'bekhudi', 'bekhabar', 'bekasoor', 'bekar', 'bemari', 'bezaar', 'badsurat', 'badtameez',
-    'kameena', 'nalaiq', 'nakara', 'ghatiya', 'bakwas', 'bewakoof', 'ahmaq', 'murda',
-    'zaleel', 'kambakht', 'laanat', 'harami', 'bad', 'worst', 'waste', 'rubbish'
-}
-ROMAN_URDU_NEUTRAL_WORDS = {
-    'hai', 'hain', 'tha', 'thi', 'ho', 'hun', 'hein', 'main', 'tum', 'wo', 'ye', 'unhon',
-    'inhon', 'sath', 'lekin', 'kyun', 'jaisa', 'waisa', 'jese', 'wese', 'phir', 'ab', 'toh',
-    'ka', 'ki', 'ke', 'ko', 'se', 'mein', 'par', 'aur', 'ya', 'kya', 'kuch', 'sab', 'apna'
-}
-# Compile regex patterns for faster matching
-roman_urdu_positive_pattern = re.compile(r'\b(' + '|'.join(ROMAN_URDU_POSITIVE_WORDS) + r')\b', re.IGNORECASE)
-roman_urdu_negative_pattern = re.compile(r'\b(' + '|'.join(ROMAN_URDU_NEGATIVE_WORDS) + r')\b', re.IGNORECASE)
-# -----------------------------
-# Enhanced Language Detection
-# -----------------------------
-def detect_language_advanced(text):
-    """Advanced language detection using model + rules"""
-    if not text.strip():
-        return "English"
-    text_clean = text.strip()
-    # Step 1: Urdu script detection (most reliable)
-    if re.search(r'[\u0600-\u06FF]', text_clean):
-        return "Urdu"
-    # Step 2: Use transformer model for language detection
-    try:
-        # Truncate very long texts to avoid model limits
-        truncated_text = text_clean[:250]
-        lang_result = lang_detector(truncated_text)[0]
-        lang_label = lang_result['label'].upper()
-        lang_score = lang_result['score']
-        # Map model outputs to our language categories
-        lang_map = {
-            'UR': 'Urdu',
-            'EN': 'English',
-            'Ro-Ur': 'English',  # Hindi often mixed with Roman Urdu
-        }
-        detected_lang = lang_map.get(lang_label, 'English')
-        # Step 3: For Urdu/English detection, apply Roman Urdu rules
-        if detected_lang in ['Urdu', 'English']:
-            if is_likely_roman_urdu(text_clean):
-                return "Roman Urdu"
-        return detected_lang
-    except Exception as e:
-        print(f"Language detection model error: {e}")
-        # Fallback to rule-based detection
-        return detect_language_fallback(text_clean)
-def is_likely_roman_urdu(text):
-    """Check if text is likely Roman Urdu using comprehensive rules"""
-    text_lower = text.lower()
-    # Count Roman Urdu specific words
-    positive_hits = len(roman_urdu_positive_pattern.findall(text_lower))
-    negative_hits = len(roman_urdu_negative_pattern.findall(text_lower))
-    total_hits = positive_hits + negative_hits
-    # Count total words
-    words = re.findall(r'\b\w+\b', text_lower)
-    total_words = len(words)
-    if total_words == 0:
-        return False
-    # Rule 1: High percentage of Roman Urdu words
-    roman_urdu_ratio = total_hits / total_words
-    if roman_urdu_ratio > 0.3:  # 30% threshold
-        return True
-    # Rule 2: Specific Roman Urdu sentence structures
-    roman_urdu_patterns = [
-        r"^[a-z ]*(hai|hain|tha|thi|ho|hun|hein)[\s\.\!]*$",
-        r"^[a-z ]*(main|tum|wo|ye|unhon|inhon)[a-z ]*(hun|hein|ho|hai)[a-z ]*$",
-        r"^[a-z ]*(acha|bura|kharab|behtar|zabardast)[a-z ]*(hai|hain|tha)[a-z ]*$",
-        r"^[a-z ]*(kyun|kese|kaise|kisne|kisliye)[a-z ]*\?$",
-        r"^[a-z ]*(bohat|bahut|zyada|zyda)[a-z ]+(acha|bura|kharab|behtar)"
-    ]
-    for pattern in roman_urdu_patterns:
-        if re.search(pattern, text_lower):
-            return True
-    # Rule 3: Presence of key Roman Urdu function words
-    function_words = ['hai', 'hain', 'tha', 'thi', 'ka', 'ki', 'ke', 'ko', 'se', 'ne']
-    function_word_count = sum(1 for word in words if word in function_words)
-    if function_word_count >= 2 and total_words <= 8:
-        return True
-    return False
-def detect_language_fallback(text):
-    """Rule-based fallback language detection"""
-    text_lower = text.lower()
-    # Urdu script check
-    if re.search(r'[\u0600-\u06FF]', text):
-        return "Urdu"
-    # Roman Urdu detection
-    if is_likely_roman_urdu(text):
-        return "Roman Urdu"
-    return "English"
-# -----------------------------
-# Roman Urdu Text Processing
-# -----------------------------
-def normalize_roman_urdu(text):
-    """Normalize Roman Urdu text variations"""
-    text = text.lower().strip()
-    # Common Roman Urdu spelling variations
-    variations = {
-        r'\bhy\b': 'hai', r'\bh\b': 'hai', r'\bhe\b': 'hai',
-        r'\bnhi\b': 'nahi', r'\bnai\b': 'nahi', r'\bna\b': 'nahi',
-        r'\bboht\b': 'bohot', r'\bbhot\b': 'bohot', r'\bbahut\b': 'bohot',
-        r'\bzyada\b': 'zyada', r'\bzada\b': 'zyada', r'\bzyda\b': 'zyada',
-        r'\bacha\b': 'acha', r'\bachay\b': 'achay', r'\bacchi\b': 'achi',
-        r'\bacche\b': 'achay', r'\bthy\b': 'thay', r'\bthi\b': 'thi',
-        r'\btha\b': 'tha', r'\bmje\b': 'mujhe', r'\btuje\b': 'tujhe',
-        r'\busi\b': 'ussi', r'\besi\b': 'essi', r'\bwohi\b': 'wohi',
-        r'\bkisi\b': 'kisi', r'\bkuch\b': 'kuch', r'\bsab\b': 'sab',
-        r'\bme\b': 'main', r'\bmai\b': 'main', r'\btu\b': 'tum',
-        r'\buss\b': 'us', r'\biss\b': 'is'
-    }
-    for pattern, replacement in variations.items():
-        text = re.sub(pattern, replacement, text)
-    return text
-# -----------------------------
-# Roman Urdu Sentiment Correction
-# -----------------------------
-def correct_roman_urdu_sentiment(text, current_sentiment, current_score):
-    """Apply Roman Urdu specific sentiment corrections"""
-    text_lower = text.lower()
-    normalized_text = normalize_roman_urdu(text_lower)
-    # Count positive and negative words
-    positive_matches = roman_urdu_positive_pattern.findall(normalized_text)
-    negative_matches = roman_urdu_negative_pattern.findall(normalized_text)
-    positive_count = len(positive_matches)
-    negative_count = len(negative_matches)
-    # Strong positive indicators
-    strong_positive_indicators = ['acha', 'achy', 'achay', 'achi', 'zabardast', 'shandaar', 'kamaal']
-    strong_negative_indicators = ['kharab', 'bura', 'ganda', 'bekaar', 'badtameez']
-    # Rule 1: If text contains strong positive words but model says negative, correct it
-    has_strong_positive = any(indicator in normalized_text for indicator in strong_positive_indicators)
-    has_strong_negative = any(indicator in normalized_text for indicator in strong_negative_indicators)
-    if has_strong_positive and current_sentiment == "Negative":
-        return "Positive", max(current_score, 0.85)
-    if has_strong_negative and current_sentiment == "Positive":
-        return "Negative", max(current_score, 0.85)
-    # Rule 2: Word count based correction
-    if positive_count > negative_count and current_sentiment == "Negative":
-        new_score = min(0.8 + (positive_count * 0.05), 0.95)
-        return "Positive", new_score
-    if negative_count > positive_count and current_sentiment == "Positive":
-        new_score = min(0.8 + (negative_count * 0.05), 0.95)
-        return "Negative", new_score
-    # Rule 3: Mixed sentiments with clear majority
-    total_sentiment_words = positive_count + negative_count
-    if total_sentiment_words >= 2:
-        positive_ratio = positive_count / total_sentiment_words
-        if positive_ratio >= 0.7 and current_sentiment != "Positive":
-            return "Positive", 0.8
-        elif positive_ratio <= 0.3 and current_sentiment != "Negative":
-            return "Negative", 0.8
-    return current_sentiment, current_score
-# -----------------------------
-# Enhanced Ensemble for Roman Urdu
-# -----------------------------
-def ensemble_roman_urdu_sentiment(text):
-    """Advanced ensemble method for Roman Urdu sentiment analysis"""
-    normalized_text = normalize_roman_urdu(text)
-    try:
-        # Get predictions from both Roman Urdu and Urdu models
-        ru_result = roman_urdu_model(normalized_text)[0]
-        ur_result = urdu_model(normalized_text)[0]
-        # Normalize labels
-        ru_sentiment = normalize_sentiment_label(ru_result["label"])
-        ur_sentiment = normalize_sentiment_label(ur_result["label"])
-        ru_score = ru_result["score"]
-        ur_score = ur_result["score"]
-        # Apply Roman Urdu corrections to both results
-        ru_sentiment_corrected, ru_score_corrected = correct_roman_urdu_sentiment(text, ru_sentiment, ru_score)
-        ur_sentiment_corrected, ur_score_corrected = correct_roman_urdu_sentiment(text, ur_sentiment, ur_score)
-        # If both models agree after correction
-        if ru_sentiment_corrected == ur_sentiment_corrected:
-            final_score = max(ru_score_corrected, ur_score_corrected)
-            return {"label": ru_sentiment_corrected, "score": final_score}
-        # Weighted voting with higher weight for Roman Urdu model
-        ru_weight = ru_score_corrected * 1.6  # Higher weight for Roman Urdu model
-        ur_weight = ur_score_corrected * 1.2
-        if ru_weight > ur_weight:
-            return {"label": ru_sentiment_corrected, "score": ru_score_corrected}
-        else:
-            return {"label": ur_sentiment_corrected, "score": ur_score_corrected}
-    except Exception as e:
-        print(f"Ensemble error: {e}")
-        # Fallback to Roman Urdu model with correction
-        try:
-            result = roman_urdu_model(normalize_roman_urdu(text))[0]
-            corrected_sentiment, corrected_score = correct_roman_urdu_sentiment(
-                text, normalize_sentiment_label(result["label"]), result["score"]
-            )
-            return {"label": corrected_sentiment, "score": corrected_score}
-        except:
-            return {"label": "Neutral", "score": 0.5}
-# -----------------------------
-# Sentiment Analysis Core Functions
-# -----------------------------
-def normalize_sentiment_label(label):
-    """Normalize sentiment labels from different models"""
-    label = str(label).lower()
-    if any(word in label for word in ["pos", "positive", "positive", "lab"]):
-        return "Positive"
-    elif any(word in label for word in ["neg", "negative", "negative"]):
-        return "Negative"
-    else:
-        return "Neutral"
-def get_strong_sentiment_words(text, language):
-    """Extract strong sentiment-bearing words"""
-    text_lower = text.lower()
-    strong_words = []
-    if language == "Roman Urdu":
-        # Use our Roman Urdu word databases
-        positive_matches = roman_urdu_positive_pattern.findall(text_lower)
-        negative_matches = roman_urdu_negative_pattern.findall(text_lower)
-        strong_words = positive_matches + negative_matches
-    elif language == "Urdu":
-        # Urdu strong words (you can expand this list)
-        urdu_positive = ['زبردست', 'شاندار', 'عمدہ', 'بہترین', 'اچھا']
-        urdu_negative = ['خراب', 'برا', 'مایوس کن', 'بیکار']
-        for word in urdu_positive + urdu_negative:
-            if word in text:
-                strong_words.append(word)
-    else:  # English
-        english_positive = ['excellent', 'outstanding', 'amazing', 'wonderful', 'perfect', 'great']
-        english_negative = ['terrible', 'awful', 'horrible', 'disappointing', 'poor', 'bad']
-        for word in english_positive + english_negative:
-            if re.search(r'\b' + re.escape(word) + r'\b', text_lower):
-                strong_words.append(word)
-    return list(set(strong_words))[:5]  # Return unique words, max 5
-def generate_detailed_explanation(text, sentiment, score, language, strong_words):
-    """Generate detailed explanation for sentiment analysis"""
-    confidence_level = "High" if score >= 0.8 else "Medium" if score >= 0.6 else "Low"
-    base_explanations = {
-        "Positive": {
-            "High": "Strong positive sentiment with clear positive expressions.",
-            "Medium": "Moderately positive sentiment with favorable tone.",
-            "Low": "Slightly positive leaning with some positive indicators."
-        },
-        "Negative": {
-            "High": "Strong negative sentiment with clear criticism.",
-            "Medium": "Moderately negative sentiment with critical tone.",
-            "Low": "Slightly negative leaning with some concerning indicators."
-        },
-        "Neutral": {
-            "High": "Clearly neutral or factual statement.",
-            "Medium": "Mostly neutral with balanced perspective.",
-            "Low": "Weak sentiment leaning neutral."
-        }
-    }
-    explanation = base_explanations[sentiment][confidence_level]
-    # Add language specific notes
-    if language == "Roman Urdu":
-        explanation += " Analyzed with Roman Urdu specific rules."
-        # Special note for common corrections
-        if any(word in text.lower() for word in ['acha', 'achy', 'achay', 'achi']):
-            if sentiment == "Positive":
-                explanation += " Words like 'acha' correctly identified as positive."
-    # Add strong words information
-    if strong_words:
-        explanation += f" Key sentiment words: {', '.join(strong_words)}."
-    explanation += f" Confidence: {score:.3f}"
-    return explanation
-# -----------------------------
-# Main Analysis Function
-# -----------------------------
-SAVE_FILE = "sentiment_logs.csv"
-LOCK_FILE = SAVE_FILE + ".lock"
-if not os.path.exists(SAVE_FILE):
-    pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"]).to_csv(
-        SAVE_FILE, index=False, encoding="utf-8-sig"
-    )
-def analyze_sentiment_complete(text, lang_hint):
-    """Complete sentiment analysis pipeline"""
-    if not text.strip():
-        return "⚠️ Please enter a sentence.", "", "", SAVE_FILE, ""
-    # Detect language
-    language = lang_hint if lang_hint != "Auto Detect" else detect_language_advanced(text)
-    try:
-        # Perform sentiment analysis based on language
-        if language == "English":
-            result = english_model(text[:512])[0]
-            sentiment = normalize_sentiment_label(result["label"])
-            score = round(float(result["score"]), 3)
-        elif language == "Urdu":
-            result = urdu_model(text[:512])[0]
-            sentiment = normalize_sentiment_label(result["label"])
-            score = round(float(result["score"]), 3)
-        else:  # Roman Urdu
-            result = ensemble_roman_urdu_sentiment(text)
-            sentiment = result["label"]
-            score = round(float(result["score"]), 3)
-        # Get strong words
-        strong_words = get_strong_sentiment_words(text, language)
-        strong_words_str = ", ".join(strong_words) if strong_words else "None"
-        # Generate explanation
-        explanation = generate_detailed_explanation(text, sentiment, score, language, strong_words)
-        # Save to CSV
-        with FileLock(LOCK_FILE):
-            df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig") if os.path.exists(SAVE_FILE) else pd.DataFrame(
-                columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"]
-            )
-            new_row = pd.DataFrame([[
-                text, language, sentiment, score, strong_words_str, pd.Timestamp.now()
-            ]], columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"])
-            df = pd.concat([df, new_row], ignore_index=True)
-            df.to_csv(SAVE_FILE, index=False, encoding="utf-8-sig")
-        return sentiment, str(score), explanation, SAVE_FILE, strong_words_str
-    except Exception as e:
-        error_msg = f"Analysis error: {str(e)}"
-        return "Error", "0", error_msg, SAVE_FILE, ""
-# -----------------------------
-# Gradio Interface
-# -----------------------------
 def show_logs():
-    if os.path.exists(SAVE_FILE):
-        df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig")
-        return df.tail(20)
-    else:
-        return pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"])
 def clear_logs():
-    if os.path.exists(SAVE_FILE):
-        os.remove(SAVE_FILE)
-    return pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"])
 with gr.Blocks(title="Multilingual Sentiment Analysis") as demo:
     gr.Markdown("""
-    # 🌍 Advanced Multilingual Sentiment Analysis
-    **English • Urdu • Roman Urdu**
-    Uses transformer models for accurate language detection and sentiment analysis with specialized Roman Urdu handling.
-    **Used models:**
-    - English: siebert/sentiment-roberta-large-english
-    - Urdu: tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu
-    - Roman Urdu: tahamueed23/roman-urdu-sentiment
-    - Language detection: papluca/xlm-roberta-base-language-detection
     """)
-    # -----------------------------
-    # TOP ROW (Two Blocks)
-    # -----------------------------
     with gr.Row():
-        # Left block (Text input + buttons)
         with gr.Column(scale=1):
             user_text = gr.Textbox(
-                label="✍️ Enter Text",
-                placeholder="Type in English, Urdu, or Roman Urdu...",
                 lines=3
             )
             lang_dropdown = gr.Dropdown(
                 ["Auto Detect", "English", "Urdu", "Roman Urdu"],
                 value="Auto Detect",
-                label="🌐 Language Selection"
             )
             with gr.Row():
-                btn_analyze = gr.Button("🔍 Analyze Sentiment", variant="primary")
-                btn_show = gr.Button("📂 Show Logs")
-                btn_clear = gr.Button("🗑️ Clear Logs")
-        # Right block (output results)
         with gr.Column(scale=1):
-            out_sent = gr.Textbox(label="🎭 Sentiment")
-            out_conf = gr.Textbox(label="📊 Confidence Score")
-            out_exp = gr.Textbox(label="💡 Detailed Explanation", lines=4)
-            out_strong = gr.Textbox(label="💪 Strong Words")
-            out_file = gr.File(label="⬇️ Download Logs")
-    # -----------------------------
-    # BOTTOM ROW (Two Blocks)
-    # -----------------------------
     with gr.Row():
-        # Left block (analysis history)
         with gr.Column(scale=1):
             logs_df = gr.Dataframe(
                 headers=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"],
-                label="📋 Analysis History",
                 interactive=False,
                 wrap=True,
                 height=350
             )
-        # Right block can be empty or used later
         with gr.Column(scale=1):
             gr.Markdown("")
-    # Event handlers
-            out_exp = gr.Textbox(label="💡 Detailed Explanation")
-            out_strong = gr.Textbox(label="🔥 Strong Sentiment Words")
-            out_file = gr.File(label="📁 Log File")
     btn_analyze.click(
         analyze_sentiment_complete,
         inputs=[user_text, lang_dropdown],
-        outputs=[out_sent, out_conf, out_exp, out_file, out_strong]
     )
-    btn_show.click(show_logs, outputs=[gr.Dataframe()])
-    btn_clear.click(clear_logs, outputs=[gr.Dataframe()])
-demo.launch()
-if __name__ == "__main__":
-    demo.launch(share=False)

 import gradio as gr
 import pandas as pd
+import numpy as np
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
+from datetime import datetime
 import os
+# ------------------------------------------------------------
+# LOAD MODELS
+# ------------------------------------------------------------
+lang_detector_name = "papluca/xlm-roberta-base-language-detection"
+eng_model_name = "siebert/sentiment-roberta-large-english"
+urdu_model_name = "tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
+roman_model_name = "tahamueed23/roman-urdu-sentiment"
+lang_pipe = pipeline("text-classification", model=lang_detector_name, tokenizer=lang_detector_name)
+eng_pipe = pipeline("sentiment-analysis", model=eng_model_name, tokenizer=eng_model_name)
+urdu_pipe = pipeline("text-classification", model=urdu_model_name, tokenizer=urdu_model_name)
+roman_pipe = pipeline("text-classification", model=roman_model_name, tokenizer=roman_model_name)
+# ------------------------------------------------------------
+# LOG STORAGE
+# ------------------------------------------------------------
+LOG_FILE = "analysis_logs.csv"
+if not os.path.exists(LOG_FILE):
+    df = pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"])
+    df.to_csv(LOG_FILE, index=False)
+def save_log(sentence, lang, sent, conf, strong_words):
+    df = pd.read_csv(LOG_FILE)
+    df.loc[len(df)] = [sentence, lang, sent, conf, strong_words, datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
+    df.to_csv(LOG_FILE, index=False)
 def show_logs():
+    return pd.read_csv(LOG_FILE)
 def clear_logs():
+    df = pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"])
+    df.to_csv(LOG_FILE, index=False)
+    return df
+# ------------------------------------------------------------
+# SENTIMENT HELPERS
+# ------------------------------------------------------------
+def detect_language(text):
+    res = lang_pipe(text)[0]['label']
+    if res.lower() in ["ur", "urd"]:
+        return "Urdu"
+    if res.lower() in ["en", "eng"]:
+        return "English"
+    return "Roman Urdu"
+def extract_strong_words(text):
+    words = text.split()
+    strong = [w for w in words if w.isupper() or w.endswith("!!!")]
+    return ", ".join(strong) if strong else "None"
+# ------------------------------------------------------------
+# MAIN ANALYSIS FUNCTION
+# ------------------------------------------------------------
+def analyze_sentiment_complete(text, selected_lang):
+    if selected_lang == "Auto Detect":
+        lang = detect_language(text)
+    else:
+        lang = selected_lang
+    if lang == "English":
+        result = eng_pipe(text)[0]
+        sentiment = result["label"]
+        score = round(float(result["score"]), 4)
+    elif lang == "Urdu":
+        result = urdu_pipe(text)[0]
+        sentiment = result["label"]
+        score = round(float(result["score"]), 4)
+    else:  # Roman Urdu
+        result = roman_pipe(text)[0]
+        sentiment = result["label"]
+        score = round(float(result["score"]), 4)
+    strong_words = extract_strong_words(text)
+    explanation = f"Language: {lang}\nStrong indicators: {strong_words}\nThe model predicts: {sentiment}"
+    save_log(text, lang, sentiment, score, strong_words)
+    return sentiment, score, explanation, LOG_FILE, strong_words
+# ------------------------------------------------------------
+# GRADIO UI LAYOUT (Final Updated Version)
+# ------------------------------------------------------------
 with gr.Blocks(title="Multilingual Sentiment Analysis") as demo:
     gr.Markdown("""
+    # Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)
+    Transformer-based sentiment classification with auto language detection.
     """)
+    # ---------------- TOP ROW ----------------
     with gr.Row():
+        # LEFT: Input controls
         with gr.Column(scale=1):
             user_text = gr.Textbox(
+                label="Enter text",
+                placeholder="Type English, Urdu, or Roman Urdu...",
                 lines=3
             )
             lang_dropdown = gr.Dropdown(
                 ["Auto Detect", "English", "Urdu", "Roman Urdu"],
                 value="Auto Detect",
+                label="Language Selection"
             )
             with gr.Row():
+                btn_analyze = gr.Button("Analyze Sentiment", variant="primary")
+                btn_show = gr.Button("Show Logs")
+                btn_clear = gr.Button("Clear Logs")
+        # RIGHT: Output panel
         with gr.Column(scale=1):
+            out_sent = gr.Textbox(label="Sentiment", interactive=False)
+            out_score = gr.Textbox(label="Confidence Score", interactive=False)
+            out_explain = gr.Textbox(label="Detailed Explanation", lines=5, interactive=False)
+            out_file = gr.File(label="Download Logs", interactive=False)
+            out_words = gr.Textbox(label="Strong Words", interactive=False)
+    # ---------------- BOTTOM ROW ----------------
     with gr.Row():
+        # LEFT: History table
         with gr.Column(scale=1):
             logs_df = gr.Dataframe(
                 headers=["Sentence", "Language", "Sentiment", "Confidence", "Strong_Words", "Timestamp"],
+                label="Analysis History",
                 interactive=False,
                 wrap=True,
                 height=350
             )
+        # RIGHT empty or for future extensions
         with gr.Column(scale=1):
             gr.Markdown("")
+    # ---------------- BUTTON ACTIONS ----------------
     btn_analyze.click(
         analyze_sentiment_complete,
         inputs=[user_text, lang_dropdown],
+        outputs=[out_sent, out_score, out_explain, out_file, out_words]
     )
+    btn_show.click(show_logs, outputs=[logs_df])
+    btn_clear.click(clear_logs, outputs=[logs_df])
+# Run app
+demo.launch()