Spaces:

tahamueed23
/

Sentiment-Analyzer

Sleeping

File size: 5,015 Bytes

import gradio as gr
from transformers import pipeline
import pandas as pd
import os
import re

# -----------------------------
# Load Models
# -----------------------------
english_model = pipeline(
    "sentiment-analysis",
    model="siebert/sentiment-roberta-large-english"
)

urdu_model = pipeline(
    "sentiment-analysis",
    model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
)

roman_urdu_model = pipeline(
    "sentiment-analysis",
    model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
)

# -----------------------------
# CSV Setup
# -----------------------------
SAVE_FILE = "sentiment_logs.csv"
if not os.path.exists(SAVE_FILE):
    pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"]).to_csv(
        SAVE_FILE, index=False, encoding="utf-8-sig"
    )

# -----------------------------
# Language Detection (simple rule-based)
# -----------------------------
def detect_language(text):
    urdu_chars = set("ابتثجحخدذرزسشصضطظعغفقکلمنوہیءآؤئۀ")
    if any(ch in urdu_chars for ch in text):
        return "Urdu"
    roman_urdu_pattern = r"\b(hai|kia|kyun|nahi|bohot|acha|galat|sahi|parhai|ustad|pyar|dil|insaan|zindagi|khushi|gham|dost|kitab|school|college|teacher|parhna|samajh|likhna|khana|peena|soch|sapna|sabar|shukriya|khuda|allah|dua|mazak|thaka|thori|abhi|kal|aaj|kal|haan|na|acha|bura|mazedaar|mehsoos|mehnat|imtihan|class|talib|ilam|zuban|mushkil|asan)\b"
    if re.search(roman_urdu_pattern, text.lower()):
        return "Roman Urdu"
    return "English"

# -----------------------------
# Normalize Sentiment Labels
# -----------------------------
def normalize_label(label):
    label = label.lower()
    if "pos" in label or "positive" in label:
        return "Positive"
    elif "neg" in label or "negative" in label:
        return "Negative"
    else:
        return "Neutral"

# -----------------------------
# Add Emojis + Tips
# -----------------------------
def sentiment_with_tips(sentiment):
    tips = {
        "Positive": "😊 Great! Keep spreading positivity.",
        "Negative": "😞 It seems negative. Try to focus on solutions.",
        "Neutral": "😐 Neutral statement. Could go either way."
    }
    return tips.get(sentiment, "")

# -----------------------------
# Main Sentiment Function
# -----------------------------
def analyze_sentiment(text, lang_hint):
    try:
        if not text.strip():
            return "⚠️ Please enter a sentence.", "", "", SAVE_FILE

        # Auto detect if language hint is not selected
        lang = lang_hint if lang_hint != "Auto Detect" else detect_language(text)

        # Select model
        if lang == "English":
            result = english_model(text)[0]
        elif lang == "Urdu":
            result = urdu_model(text)[0]
        else:
            result = roman_urdu_model(text)[0]

        # Process results
        sentiment = normalize_label(result["label"])
        score = round(float(result["score"]), 3)
        explanation = sentiment_with_tips(sentiment)

        # Save to CSV (UTF-8 safe)
        try:
            df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig")
        except:
            df = pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])

        new_row = pd.DataFrame([[text, lang, sentiment, score]],
                               columns=["Sentence", "Language", "Sentiment", "Confidence"])
        df = pd.concat([df, new_row], ignore_index=True)
        df.to_csv(SAVE_FILE, index=False, encoding="utf-8-sig")

        return sentiment, str(score), explanation, SAVE_FILE

    except Exception as e:
        return f"⚠️ Error: {str(e)}", "", "", SAVE_FILE

# -----------------------------
# Gradio UI
# -----------------------------
with gr.Blocks() as demo:
    gr.Markdown(
        "## 🌍 Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)\n"
        "Detect sentiment as **Positive, Neutral, or Negative** with confidence score.\n\n"
        "📌 Features:\n"
        "- Choose language (or Auto Detect)\n"
        "- Download all results as CSV\n"
        "- Emojis + Tips for better understanding 🎯"
    )

    with gr.Row():
        with gr.Column():
            user_text = gr.Textbox(label="✍️ Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
            lang_dropdown = gr.Dropdown(["Auto Detect", "English", "Urdu", "Roman Urdu"],
                                        label="🌐 Language", value="Auto Detect")
            btn = gr.Button("🔍 Analyze")

        with gr.Column():
            out_sent = gr.Textbox(label="Sentiment")
            out_conf = gr.Textbox(label="Confidence (0–1)")
            out_exp  = gr.Textbox(label="Explanation")
            out_file = gr.File(label="⬇️ Download Logs (.csv)", type="filepath")

    btn.click(analyze_sentiment, inputs=[user_text, lang_dropdown],
              outputs=[out_sent, out_conf, out_exp, out_file])

if __name__ == "__main__":
    demo.launch()