Spaces:

mrgmd01
/

Multilingual_Sentiment_Analysis

Sleeping

File size: 3,519 Bytes

b445cfc
 
c3ed96e
 
 
b445cfc
 
 
 
03b573d
b445cfc
 
 
 
03b573d
b445cfc
 
c3ed96e
 
03b573d
c3ed96e
 
 
 
 
 
 
 
 
 
 
b445cfc
 
c3ed96e
 
 
 
 
 
b445cfc
c3ed96e
b445cfc
 
 
c3ed96e
b445cfc
c3ed96e
b445cfc
c3ed96e
b445cfc
0f459d4
 
 
21a70c6
 
0f459d4
 
 
 
 
5e58202
b445cfc
5e58202
b445cfc
0f459d4
 
b445cfc
c3ed96e
b445cfc
c3ed96e
b445cfc
5e58202
c3ed96e
b445cfc
 
0f459d4
b445cfc
 
0f459d4
c3ed96e
 
 
 
b445cfc
0f459d4
c3ed96e
 
 
0f459d4
 
 
 
047160a
 
0f459d4
 
 
493a4dc
0f459d4
 
493a4dc
 
 
21a70c6
0f459d4
493a4dc
 
 
5e58202
b445cfc
5e58202

import gradio as gr
from transformers import pipeline
import pandas as pd
import os
import re

# Load models
english_model = pipeline(
    "sentiment-analysis",
    model="siebert/sentiment-roberta-large-english"
)

urdu_model = pipeline(
    "sentiment-analysis",
    model="mrgmd01/SA_Model_bert-base-multilingual-uncased"
)

roman_urdu_model = pipeline(
    "sentiment-analysis",
    model="mrgmd01/SA_Model_bert-base-multilingual-uncased"  # replace with roman urdu model if available
)

# File to store only sentences
SAVE_FILE = "sentences.csv"

# Initialize file if not exists
if not os.path.exists(SAVE_FILE):
    df = pd.DataFrame(columns=["Sentence"])
    df.to_csv(SAVE_FILE, index=False)

# Detect language: English / Urdu / Roman Urdu
def detect_language(text):
    urdu_chars = set("ابتثجحخدذرزسشصضطظعغفقکلمنوہیءآؤئۀ")
    if any(ch in urdu_chars for ch in text):
        return "Urdu"
    roman_urdu_pattern = r"\b(hai|kia|kyun|nahi|bohot|acha|galat|sahi|parhai|ustad)\b"
    if re.search(roman_urdu_pattern, text.lower()):
        return "Roman Urdu"
    return "English"

# Normalize labels
def normalize_label(label):
    label = label.lower()
    if "positive" in label:
        return "Positive"
    elif "negative" in label:
        return "Negative"
    else:
        return "Neutral"

# Add emojis for polarity
def add_emoji(label):
    mapping = {
        "Positive": "😊",
        "Negative": "😞",
        "Neutral": "😐"
    }
    return f"{label} {mapping.get(label, '')}"

# Prediction + Save sentence
def analyze_single(text, lang_hint):
    if not text.strip():
        return "Please enter a sentence.", "", "", SAVE_FILE

    # Auto detect if user keeps default "English"
    lang = detect_language(text) if lang_hint == "English" else lang_hint

    if lang == "English":
        result = english_model(text)[0]
    elif lang == "Urdu":
        result = urdu_model(text)[0]
    else:
        result = roman_urdu_model(text)[0]

    sentiment = normalize_label(result["label"])
    sentiment_with_emoji = add_emoji(sentiment)
    score = round(result["score"], 3)

    # Save only the sentence
    df = pd.read_csv(SAVE_FILE)
    new_row = pd.DataFrame([[text]], columns=["Sentence"])
    df = pd.concat([df, new_row], ignore_index=True)
    df.to_csv(SAVE_FILE, index=False)

    return sentiment, str(score), sentiment_with_emoji, SAVE_FILE

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown(
        "## 🌍 Multilingual Sentiment Analysis (Positive • Neutral • Negative)\n"
        "**Languages:** English, Urdu, Roman Urdu  \n"
        "📌 Models:  \n"
        "- `siebert/sentiment-roberta-large-english (English)`  \n"
        "- `mrgmd01/SA_Model_bert-base-multilingual-uncased (Urdu & Roman Urdu)`"
    )

    with gr.Tab("Sentiment Analyzer"):
        user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
        lang_dropdown = gr.Dropdown(["English", "Urdu", "Roman Urdu"], label="Language Hint", value="English")
        btn = gr.Button("🔍 Analyze")

        out_sent = gr.Textbox(label="Sentiment")
        out_conf = gr.Textbox(label="Confidence (0–1)")
        out_pol  = gr.Textbox(label="Polarity")
        out_file = gr.File(label="⬇️ Download Sentences (.csv)", type="filepath")

        btn.click(analyze_single, inputs=[user_text, lang_dropdown],
                  outputs=[out_sent, out_conf, out_pol, out_file])

if __name__ == "__main__":
    demo.launch()