File size: 3,519 Bytes
b445cfc c3ed96e b445cfc 03b573d b445cfc 03b573d b445cfc c3ed96e 03b573d c3ed96e b445cfc c3ed96e b445cfc c3ed96e b445cfc c3ed96e b445cfc c3ed96e b445cfc c3ed96e b445cfc 0f459d4 21a70c6 0f459d4 5e58202 b445cfc 5e58202 b445cfc 0f459d4 b445cfc c3ed96e b445cfc c3ed96e b445cfc 5e58202 c3ed96e b445cfc 0f459d4 b445cfc 0f459d4 c3ed96e b445cfc 0f459d4 c3ed96e 0f459d4 047160a 0f459d4 493a4dc 0f459d4 493a4dc 21a70c6 0f459d4 493a4dc 5e58202 b445cfc 5e58202 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import gradio as gr
from transformers import pipeline
import pandas as pd
import os
import re
# Load models
english_model = pipeline(
"sentiment-analysis",
model="siebert/sentiment-roberta-large-english"
)
urdu_model = pipeline(
"sentiment-analysis",
model="mrgmd01/SA_Model_bert-base-multilingual-uncased"
)
roman_urdu_model = pipeline(
"sentiment-analysis",
model="mrgmd01/SA_Model_bert-base-multilingual-uncased" # replace with roman urdu model if available
)
# File to store only sentences
SAVE_FILE = "sentences.csv"
# Initialize file if not exists
if not os.path.exists(SAVE_FILE):
df = pd.DataFrame(columns=["Sentence"])
df.to_csv(SAVE_FILE, index=False)
# Detect language: English / Urdu / Roman Urdu
def detect_language(text):
urdu_chars = set("ابتثجحخدذرزسشصضطظعغفقکلمنوہیءآؤئۀ")
if any(ch in urdu_chars for ch in text):
return "Urdu"
roman_urdu_pattern = r"\b(hai|kia|kyun|nahi|bohot|acha|galat|sahi|parhai|ustad)\b"
if re.search(roman_urdu_pattern, text.lower()):
return "Roman Urdu"
return "English"
# Normalize labels
def normalize_label(label):
label = label.lower()
if "positive" in label:
return "Positive"
elif "negative" in label:
return "Negative"
else:
return "Neutral"
# Add emojis for polarity
def add_emoji(label):
mapping = {
"Positive": "😊",
"Negative": "😞",
"Neutral": "😐"
}
return f"{label} {mapping.get(label, '')}"
# Prediction + Save sentence
def analyze_single(text, lang_hint):
if not text.strip():
return "Please enter a sentence.", "", "", SAVE_FILE
# Auto detect if user keeps default "English"
lang = detect_language(text) if lang_hint == "English" else lang_hint
if lang == "English":
result = english_model(text)[0]
elif lang == "Urdu":
result = urdu_model(text)[0]
else:
result = roman_urdu_model(text)[0]
sentiment = normalize_label(result["label"])
sentiment_with_emoji = add_emoji(sentiment)
score = round(result["score"], 3)
# Save only the sentence
df = pd.read_csv(SAVE_FILE)
new_row = pd.DataFrame([[text]], columns=["Sentence"])
df = pd.concat([df, new_row], ignore_index=True)
df.to_csv(SAVE_FILE, index=False)
return sentiment, str(score), sentiment_with_emoji, SAVE_FILE
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown(
"## 🌍 Multilingual Sentiment Analysis (Positive • Neutral • Negative)\n"
"**Languages:** English, Urdu, Roman Urdu \n"
"📌 Models: \n"
"- `siebert/sentiment-roberta-large-english (English)` \n"
"- `mrgmd01/SA_Model_bert-base-multilingual-uncased (Urdu & Roman Urdu)`"
)
with gr.Tab("Sentiment Analyzer"):
user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
lang_dropdown = gr.Dropdown(["English", "Urdu", "Roman Urdu"], label="Language Hint", value="English")
btn = gr.Button("🔍 Analyze")
out_sent = gr.Textbox(label="Sentiment")
out_conf = gr.Textbox(label="Confidence (0–1)")
out_pol = gr.Textbox(label="Polarity")
out_file = gr.File(label="⬇️ Download Sentences (.csv)", type="filepath")
btn.click(analyze_single, inputs=[user_text, lang_dropdown],
outputs=[out_sent, out_conf, out_pol, out_file])
if __name__ == "__main__":
demo.launch()
|