Spaces:
Sleeping
Sleeping
File size: 5,015 Bytes
ea7b9be 6743c3d cd458ad 6743c3d cd458ad fb958ae cd458ad 6743c3d cd458ad 6743c3d cd458ad f0abce9 6743c3d cd458ad 3840bbb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import gradio as gr
from transformers import pipeline
import pandas as pd
import os
import re
# -----------------------------
# Load Models
# -----------------------------
english_model = pipeline(
"sentiment-analysis",
model="siebert/sentiment-roberta-large-english"
)
urdu_model = pipeline(
"sentiment-analysis",
model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
)
roman_urdu_model = pipeline(
"sentiment-analysis",
model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
)
# -----------------------------
# CSV Setup
# -----------------------------
SAVE_FILE = "sentiment_logs.csv"
if not os.path.exists(SAVE_FILE):
pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"]).to_csv(
SAVE_FILE, index=False, encoding="utf-8-sig"
)
# -----------------------------
# Language Detection (simple rule-based)
# -----------------------------
def detect_language(text):
urdu_chars = set("ابتثجحخدذرزسشصضطظعغفقکلمنوہیءآؤئۀ")
if any(ch in urdu_chars for ch in text):
return "Urdu"
roman_urdu_pattern = r"\b(hai|kia|kyun|nahi|bohot|acha|galat|sahi|parhai|ustad|pyar|dil|insaan|zindagi|khushi|gham|dost|kitab|school|college|teacher|parhna|samajh|likhna|khana|peena|soch|sapna|sabar|shukriya|khuda|allah|dua|mazak|thaka|thori|abhi|kal|aaj|kal|haan|na|acha|bura|mazedaar|mehsoos|mehnat|imtihan|class|talib|ilam|zuban|mushkil|asan)\b"
if re.search(roman_urdu_pattern, text.lower()):
return "Roman Urdu"
return "English"
# -----------------------------
# Normalize Sentiment Labels
# -----------------------------
def normalize_label(label):
label = label.lower()
if "pos" in label or "positive" in label:
return "Positive"
elif "neg" in label or "negative" in label:
return "Negative"
else:
return "Neutral"
# -----------------------------
# Add Emojis + Tips
# -----------------------------
def sentiment_with_tips(sentiment):
tips = {
"Positive": "😊 Great! Keep spreading positivity.",
"Negative": "😞 It seems negative. Try to focus on solutions.",
"Neutral": "😐 Neutral statement. Could go either way."
}
return tips.get(sentiment, "")
# -----------------------------
# Main Sentiment Function
# -----------------------------
def analyze_sentiment(text, lang_hint):
try:
if not text.strip():
return "⚠️ Please enter a sentence.", "", "", SAVE_FILE
# Auto detect if language hint is not selected
lang = lang_hint if lang_hint != "Auto Detect" else detect_language(text)
# Select model
if lang == "English":
result = english_model(text)[0]
elif lang == "Urdu":
result = urdu_model(text)[0]
else:
result = roman_urdu_model(text)[0]
# Process results
sentiment = normalize_label(result["label"])
score = round(float(result["score"]), 3)
explanation = sentiment_with_tips(sentiment)
# Save to CSV (UTF-8 safe)
try:
df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig")
except:
df = pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])
new_row = pd.DataFrame([[text, lang, sentiment, score]],
columns=["Sentence", "Language", "Sentiment", "Confidence"])
df = pd.concat([df, new_row], ignore_index=True)
df.to_csv(SAVE_FILE, index=False, encoding="utf-8-sig")
return sentiment, str(score), explanation, SAVE_FILE
except Exception as e:
return f"⚠️ Error: {str(e)}", "", "", SAVE_FILE
# -----------------------------
# Gradio UI
# -----------------------------
with gr.Blocks() as demo:
gr.Markdown(
"## 🌍 Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)\n"
"Detect sentiment as **Positive, Neutral, or Negative** with confidence score.\n\n"
"📌 Features:\n"
"- Choose language (or Auto Detect)\n"
"- Download all results as CSV\n"
"- Emojis + Tips for better understanding 🎯"
)
with gr.Row():
with gr.Column():
user_text = gr.Textbox(label="✍️ Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
lang_dropdown = gr.Dropdown(["Auto Detect", "English", "Urdu", "Roman Urdu"],
label="🌐 Language", value="Auto Detect")
btn = gr.Button("🔍 Analyze")
with gr.Column():
out_sent = gr.Textbox(label="Sentiment")
out_conf = gr.Textbox(label="Confidence (0–1)")
out_exp = gr.Textbox(label="Explanation")
out_file = gr.File(label="⬇️ Download Logs (.csv)", type="filepath")
btn.click(analyze_sentiment, inputs=[user_text, lang_dropdown],
outputs=[out_sent, out_conf, out_exp, out_file])
if __name__ == "__main__":
demo.launch()
|