Spaces:

tahamueed23
/

Sentiment-Analyzer

Sleeping

App Files Files Community

tahamueed23 commited on Dec 27, 2025

Commit

fd36e32

verified ·

1 Parent(s): 55d0499

Update app.py

Browse files

Files changed (1) hide show

app.py +471 -40

app.py CHANGED Viewed

@@ -1,50 +1,481 @@
 import gradio as gr
-import pandas as pd
 from transformers import pipeline
-# Load models
-urdu_model = pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu")
-roman_urdu_model = pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu")
-english_model = pipeline("sentiment-analysis", model="tahamueed23/sentiment_roberta_english_finetuned")
-# DataFrame to store results
-results_df = pd.DataFrame(columns=["Sentence", "Sentiment"])
-def analyze_sentiment(sentence):
-    global results_df
-    if any(unicode.isdigit() for unicode in sentence):
-        return "Invalid input. Please enter a valid sentence without numbers."
-    if all('\u0600' <= char <= '\u06FF' for char in sentence):  # Check if Urdu
-        model = urdu_model
-    elif all('a' <= char <= 'z' or 'A' <= char <= 'Z' for char in sentence):  # Check if English
-        model = english_model
-    elif any(char.isalpha() for char in sentence):  # Check if Roman Urdu
-        model = roman_urdu_model
     else:
-        return "Unsupported language."
-    sentiment = model(sentence)[0]
-    results_df = results_df.append({"Sentence": sentence, "Sentiment": sentiment['label']}, ignore_index=True)
-    return sentiment['label']
-def save_to_csv():
-    if results_df.empty:
-        return "No results to save."
-    results_df.to_csv("sentiment_analysis_results.csv", index=False)
-    return "Results saved to sentiment_analysis_results.csv."
-# Create Gradio Interface
-with gr.Blocks() as demo:
-    gr.Markdown("# Sentiment Analysis Tool")
-    input_text = gr.TextArea(label="Enter your sentence here:", placeholder="Type sentence...")
-    analyze_button = gr.Button("Analyze")
-    sentiment_output = gr.Label(label="Sentiment Output")
-    analyze_button.click(fn=analyze_sentiment, inputs=input_text, outputs=sentiment_output)
-    save_button = gr.Button("Save to CSV")
-    save_output = gr.Label(label="Save Output")
-    save_button.click(fn=save_to_csv, outputs=save_output)
-demo.launch(debug=True)

 import gradio as gr
 from transformers import pipeline
+import pandas as pd
+import os
+import re
+from datetime import datetime
+from filelock import FileLock
+import unicodedata
+# ==========================================
+# MODEL LOADING
+# ==========================================
+print("🔄 Loading models...")
+try:
+    # Load sentiment models
+    english_model = pipeline(
+        "sentiment-analysis",
+        model="tahamueed23/sentiment_roberta_english_finetuned"
+    )
+    # Same model for both Urdu and Roman Urdu as per your requirements
+    urdu_roman_model = pipeline(
+        "sentiment-analysis",
+        model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
+    )
+    print("✅ All models loaded successfully!")
+except Exception as e:
+    print(f"❌ Error loading models: {e}")
+    raise
+# ==========================================
+# LANGUAGE DETECTION
+# ==========================================
+def contains_urdu_script(text):
+    """Check if text contains Urdu/Arabic script"""
+    urdu_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\uFB50-\uFDFF\uFE70-\uFEFF]')
+    return bool(urdu_pattern.search(text))
+def is_roman_urdu(text):
+    """Detect Roman Urdu using comprehensive word patterns"""
+    text_lower = text.lower().strip()
+    # Roman Urdu specific words
+    roman_urdu_markers = {
+        # Common verbs and helping verbs
+        'hai', 'hain', 'tha', 'thi', 'thay', 'ho', 'hun', 'hoon', 'hein', 'he', 'hy',
+        # Pronouns
+        'main', 'mein', 'mai', 'tum', 'wo', 'woh', 'ye', 'yeh', 'ap', 'aap',
+        # Prepositions
+        'ka', 'ki', 'ke', 'ko', 'se', 'ne', 'par', 'pe',
+        # Common words
+        'nahi', 'nhi', 'nahin', 'kya', 'kyun', 'kaise', 'kese', 'kahan', 'kab',
+        # Sentiment words
+        'acha', 'achy', 'achha', 'accha', 'achi', 'bura', 'kharab', 'behtar',
+        'zabardast', 'bekar', 'bekaar', 'bohot', 'bohat', 'bahut', 'bhot',
+        # Action words
+        'karo', 'karna', 'karna', 'karein', 'kiya', 'kia', 'gaya', 'gayi', 'gaye',
+        'dena', 'lena', 'dekho', 'dekha', 'suno', 'suna', 'samjho', 'samjha',
+        # Conjunctions
+        'aur', 'or', 'lekin', 'magar', 'ya', 'phir', 'to', 'toh',
+        # Time words
+        'ab', 'abhi', 'kal', 'parso', 'aj', 'aaj',
+        # Common expressions
+        'sath', 'saath', 'pas', 'paas', 'dur', 'door', 'sab', 'kuch', 'koi'
+    }
+    # Tokenize text
+    words = re.findall(r'\b\w+\b', text_lower)
+    if not words:
+        return False
+    # Count Roman Urdu markers
+    marker_count = sum(1 for word in words if word in roman_urdu_markers)
+    marker_ratio = marker_count / len(words)
+    # Detection thresholds
+    if len(words) <= 3:
+        # For very short text, need at least one marker
+        return marker_count >= 1
+    elif len(words) <= 8:
+        # For short text, need 25% markers
+        return marker_ratio >= 0.25
+    else:
+        # For longer text, need 20% markers
+        return marker_ratio >= 0.20
+def detect_language(text):
+    """
+    Detect language with high accuracy
+    Returns: 'English', 'Urdu', or 'Roman Urdu'
+    """
+    if not text or not text.strip():
+        return "English"
+    text = text.strip()
+    # Check for Urdu script (most reliable)
+    if contains_urdu_script(text):
+        return "Urdu"
+    # Check for Roman Urdu patterns
+    if is_roman_urdu(text):
+        return "Roman Urdu"
+    # Default to English
+    return "English"
+# ==========================================
+# SENTIMENT ANALYSIS
+# ==========================================
+def normalize_label(label):
+    """Normalize sentiment labels from different models"""
+    label_lower = str(label).lower()
+    if 'pos' in label_lower or 'positive' in label_lower:
+        return "Positive"
+    elif 'neg' in label_lower or 'negative' in label_lower:
+        return "Negative"
+    elif 'neu' in label_lower or 'neutral' in label_lower:
+        return "Neutral"
     else:
+        return label
+def get_sentiment_emoji(sentiment):
+    """Return emoji for sentiment"""
+    emoji_map = {
+        "Positive": "😊",
+        "Negative": "😞",
+        "Neutral": "😐"
+    }
+    return emoji_map.get(sentiment, "")
+def analyze_sentiment(text, language):
+    """
+    Perform sentiment analysis based on detected language
+    """
+    try:
+        # Truncate text if too long
+        text_input = text[:512]
+        # Choose model based on language
+        if language == "English":
+            result = english_model(text_input)[0]
+        else:  # Urdu or Roman Urdu
+            result = urdu_roman_model(text_input)[0]
+        # Extract and normalize results
+        sentiment = normalize_label(result['label'])
+        confidence = round(float(result['score']), 4)
+        return sentiment, confidence
+    except Exception as e:
+        print(f"Error in sentiment analysis: {e}")
+        return "Error", 0.0
+# ==========================================
+# CSV LOGGING
+# ==========================================
+CSV_FILE = "sentiment_analysis_logs.csv"
+LOCK_FILE = CSV_FILE + ".lock"
+def initialize_csv():
+    """Initialize CSV file if it doesn't exist"""
+    if not os.path.exists(CSV_FILE):
+        df = pd.DataFrame(columns=[
+            "Timestamp", "Text", "Language", "Sentiment", "Confidence"
+        ])
+        df.to_csv(CSV_FILE, index=False, encoding='utf-8-sig')
+def save_to_csv(text, language, sentiment, confidence):
+    """Save analysis result to CSV with file locking"""
+    try:
+        with FileLock(LOCK_FILE, timeout=10):
+            # Read existing data
+            if os.path.exists(CSV_FILE):
+                df = pd.read_csv(CSV_FILE, encoding='utf-8-sig')
+            else:
+                df = pd.DataFrame(columns=[
+                    "Timestamp", "Text", "Language", "Sentiment", "Confidence"
+                ])
+            # Add new row
+            new_row = pd.DataFrame([{
+                "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                "Text": text,
+                "Language": language,
+                "Sentiment": sentiment,
+                "Confidence": confidence
+            }])
+            df = pd.concat([df, new_row], ignore_index=True)
+            # Save to CSV
+            df.to_csv(CSV_FILE, index=False, encoding='utf-8-sig')
+            return True
+    except Exception as e:
+        print(f"Error saving to CSV: {e}")
+        return False
+def load_logs():
+    """Load recent logs from CSV"""
+    try:
+        if os.path.exists(CSV_FILE):
+            df = pd.read_csv(CSV_FILE, encoding='utf-8-sig')
+            # Return last 50 entries, most recent first
+            return df.tail(50).iloc[::-1]
+        else:
+            return pd.DataFrame(columns=[
+                "Timestamp", "Text", "Language", "Sentiment", "Confidence"
+            ])
+    except Exception as e:
+        print(f"Error loading logs: {e}")
+        return pd.DataFrame(columns=[
+            "Timestamp", "Text", "Language", "Sentiment", "Confidence"
+        ])
+def clear_logs():
+    """Clear all logs"""
+    try:
+        if os.path.exists(CSV_FILE):
+            os.remove(CSV_FILE)
+        initialize_csv()
+        return pd.DataFrame(columns=[
+            "Timestamp", "Text", "Language", "Sentiment", "Confidence"
+        ])
+    except Exception as e:
+        print(f"Error clearing logs: {e}")
+        return load_logs()
+# ==========================================
+# MAIN ANALYSIS FUNCTION
+# ==========================================
+def process_sentiment(text):
+    """
+    Main function to process sentiment analysis
+    """
+    if not text or not text.strip():
+        return (
+            "",
+            "",
+            "",
+            "",
+            load_logs(),
+            CSV_FILE
+        )
+    # Detect language
+    language = detect_language(text)
+    # Analyze sentiment
+    sentiment, confidence = analyze_sentiment(text, language)
+    # Format results
+    emoji = get_sentiment_emoji(sentiment)
+    result_text = f"{emoji} {sentiment}"
+    confidence_text = f"{confidence:.2%}"
+    # Create detailed result
+    detail = f"**Language:** {language}\n**Sentiment:** {sentiment}\n**Confidence:** {confidence:.4f}"
+    # Save to CSV
+    save_to_csv(text, language, sentiment, confidence)
+    # Load updated logs
+    logs = load_logs()
+    return (
+        result_text,
+        confidence_text,
+        language,
+        detail,
+        logs,
+        CSV_FILE
+    )
+# ==========================================
+# GRADIO INTERFACE
+# ==========================================
+# Initialize CSV on startup
+initialize_csv()
+# Custom CSS for better styling
+custom_css = """
+.container {
+    max-width: 1400px;
+    margin: auto;
+}
+.header {
+    text-align: center;
+    padding: 20px;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    border-radius: 10px;
+    margin-bottom: 20px;
+}
+.result-box {
+    font-size: 24px;
+    font-weight: bold;
+    text-align: center;
+    padding: 20px;
+    border-radius: 10px;
+    background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
+    color: white;
+}
+.confidence-box {
+    font-size: 20px;
+    text-align: center;
+    padding: 15px;
+    border-radius: 10px;
+    background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
+    color: white;
+}
+.stats-box {
+    padding: 15px;
+    border-radius: 8px;
+    background: #f8f9fa;
+    border: 1px solid #dee2e6;
+}
+"""
+# Create Gradio interface
+with gr.Blocks(css=custom_css, title="Sentiment Analysis - Student Feedback") as demo:
+    # Header
+    gr.HTML("""
+    <div class="header">
+        <h1>🎓 Student Feedback Sentiment Analysis</h1>
+        <p style="font-size: 18px; margin-top: 10px;">
+            Multilingual Support: English • اردو • Roman Urdu
+        </p>
+    </div>
+    """)
+    # Main content
+    with gr.Row():
+        # Left column - Input
+        with gr.Column(scale=1):
+            gr.Markdown("### 📝 Enter Feedback")
+            input_text = gr.Textbox(
+                label="Student Feedback",
+                placeholder="Enter feedback in English, Urdu, or Roman Urdu...\nPress Enter or click Analyze",
+                lines=5,
+                max_lines=10
+            )
+            with gr.Row():
+                analyze_btn = gr.Button("🔍 Analyze Sentiment", variant="primary", scale=2)
+                clear_btn = gr.Button("🗑️ Clear Logs", variant="secondary", scale=1)
+        # Right column - Results
+        with gr.Column(scale=1):
+            gr.Markdown("### 📊 Analysis Results")
+            with gr.Row():
+                sentiment_output = gr.Textbox(
+                    label="Sentiment",
+                    interactive=False,
+                    elem_classes="result-box"
+                )
+                confidence_output = gr.Textbox(
+                    label="Confidence",
+                    interactive=False,
+                    elem_classes="confidence-box"
+                )
+            language_output = gr.Textbox(
+                label="Detected Language",
+                interactive=False
+            )
+            detail_output = gr.Markdown(
+                label="Details",
+                value=""
+            )
+    # Bottom section - Logs and Export
+    gr.Markdown("---")
+    gr.Markdown("### 📋 Analysis History")
+    with gr.Row():
+        logs_display = gr.Dataframe(
+            headers=["Timestamp", "Text", "Language", "Sentiment", "Confidence"],
+            datatype=["str", "str", "str", "str", "number"],
+            label="Recent Analyses",
+            wrap=True,
+            interactive=False,
+            value=load_logs()
+        )
+    with gr.Row():
+        export_file = gr.File(
+            label="📥 Download Complete Logs (CSV)",
+            value=CSV_FILE,
+            interactive=False
+        )
+        gr.Markdown("""
+        **💡 Tips:**
+        - Type your feedback and press **Enter** or click **Analyze**
+        - Supports English, Urdu (اردو), and Roman Urdu
+        - All analyses are automatically saved
+        - Download CSV for complete history
+        """)
+    # Model information
+    gr.Markdown("---")
+    with gr.Accordion("ℹ️ Model Information", open=False):
+        gr.Markdown("""
+        **Models Used:**
+        - **English:** tahamueed23/sentiment_roberta_english_finetuned
+        - **Urdu & Roman Urdu:** tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu
+        **Features:**
+        - Automatic language detection
+        - High-accuracy sentiment classification
+        - Real-time analysis
+        - CSV export for data analysis
+        - Support for mixed feedback in different languages
+        """)
+    # Event handlers
+    def process_and_update(text):
+        return process_sentiment(text)
+    # Click event
+    analyze_btn.click(
+        fn=process_and_update,
+        inputs=[input_text],
+        outputs=[
+            sentiment_output,
+            confidence_output,
+            language_output,
+            detail_output,
+            logs_display,
+            export_file
+        ]
+    )
+    # Enter key event
+    input_text.submit(
+        fn=process_and_update,
+        inputs=[input_text],
+        outputs=[
+            sentiment_output,
+            confidence_output,
+            language_output,
+            detail_output,
+            logs_display,
+            export_file
+        ]
+    )
+    # Clear logs event
+    clear_btn.click(
+        fn=clear_logs,
+        inputs=[],
+        outputs=[logs_display]
+    )
+# Launch the app
+if __name__ == "__main__":
+    print("\n" + "="*50)
+    print("🚀 Starting Sentiment Analysis Application")
+    print("="*50 + "\n")
+    demo.launch(
+        share=False,
+        show_error=True,
+        server_name="0.0.0.0",
+        server_port=7860
+    )