File size: 5,015 Bytes
ea7b9be
6743c3d
cd458ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6743c3d
 
 
cd458ad
 
 
 
 
 
 
 
fb958ae
cd458ad
 
 
 
 
 
 
 
 
6743c3d
cd458ad
6743c3d
cd458ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0abce9
6743c3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd458ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3840bbb
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import gradio as gr
from transformers import pipeline
import pandas as pd
import os
import re

# -----------------------------
# Load Models
# -----------------------------
english_model = pipeline(
    "sentiment-analysis",
    model="siebert/sentiment-roberta-large-english"
)

urdu_model = pipeline(
    "sentiment-analysis",
    model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
)

roman_urdu_model = pipeline(
    "sentiment-analysis",
    model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
)

# -----------------------------
# CSV Setup
# -----------------------------
SAVE_FILE = "sentiment_logs.csv"
if not os.path.exists(SAVE_FILE):
    pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"]).to_csv(
        SAVE_FILE, index=False, encoding="utf-8-sig"
    )

# -----------------------------
# Language Detection (simple rule-based)
# -----------------------------
def detect_language(text):
    urdu_chars = set("ابتثجحخدذرزسشصضطظعغفقکلمنوہیءآؤئۀ")
    if any(ch in urdu_chars for ch in text):
        return "Urdu"
    roman_urdu_pattern = r"\b(hai|kia|kyun|nahi|bohot|acha|galat|sahi|parhai|ustad|pyar|dil|insaan|zindagi|khushi|gham|dost|kitab|school|college|teacher|parhna|samajh|likhna|khana|peena|soch|sapna|sabar|shukriya|khuda|allah|dua|mazak|thaka|thori|abhi|kal|aaj|kal|haan|na|acha|bura|mazedaar|mehsoos|mehnat|imtihan|class|talib|ilam|zuban|mushkil|asan)\b"
    if re.search(roman_urdu_pattern, text.lower()):
        return "Roman Urdu"
    return "English"

# -----------------------------
# Normalize Sentiment Labels
# -----------------------------
def normalize_label(label):
    label = label.lower()
    if "pos" in label or "positive" in label:
        return "Positive"
    elif "neg" in label or "negative" in label:
        return "Negative"
    else:
        return "Neutral"

# -----------------------------
# Add Emojis + Tips
# -----------------------------
def sentiment_with_tips(sentiment):
    tips = {
        "Positive": "😊 Great! Keep spreading positivity.",
        "Negative": "😞 It seems negative. Try to focus on solutions.",
        "Neutral": "😐 Neutral statement. Could go either way."
    }
    return tips.get(sentiment, "")

# -----------------------------
# Main Sentiment Function
# -----------------------------
def analyze_sentiment(text, lang_hint):
    try:
        if not text.strip():
            return "⚠️ Please enter a sentence.", "", "", SAVE_FILE

        # Auto detect if language hint is not selected
        lang = lang_hint if lang_hint != "Auto Detect" else detect_language(text)

        # Select model
        if lang == "English":
            result = english_model(text)[0]
        elif lang == "Urdu":
            result = urdu_model(text)[0]
        else:
            result = roman_urdu_model(text)[0]

        # Process results
        sentiment = normalize_label(result["label"])
        score = round(float(result["score"]), 3)
        explanation = sentiment_with_tips(sentiment)

        # Save to CSV (UTF-8 safe)
        try:
            df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig")
        except:
            df = pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])

        new_row = pd.DataFrame([[text, lang, sentiment, score]],
                               columns=["Sentence", "Language", "Sentiment", "Confidence"])
        df = pd.concat([df, new_row], ignore_index=True)
        df.to_csv(SAVE_FILE, index=False, encoding="utf-8-sig")

        return sentiment, str(score), explanation, SAVE_FILE

    except Exception as e:
        return f"⚠️ Error: {str(e)}", "", "", SAVE_FILE

# -----------------------------
# Gradio UI
# -----------------------------
with gr.Blocks() as demo:
    gr.Markdown(
        "## 🌍 Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)\n"
        "Detect sentiment as **Positive, Neutral, or Negative** with confidence score.\n\n"
        "📌 Features:\n"
        "- Choose language (or Auto Detect)\n"
        "- Download all results as CSV\n"
        "- Emojis + Tips for better understanding 🎯"
    )

    with gr.Row():
        with gr.Column():
            user_text = gr.Textbox(label="✍️ Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
            lang_dropdown = gr.Dropdown(["Auto Detect", "English", "Urdu", "Roman Urdu"],
                                        label="🌐 Language", value="Auto Detect")
            btn = gr.Button("🔍 Analyze")

        with gr.Column():
            out_sent = gr.Textbox(label="Sentiment")
            out_conf = gr.Textbox(label="Confidence (0–1)")
            out_exp  = gr.Textbox(label="Explanation")
            out_file = gr.File(label="⬇️ Download Logs (.csv)", type="filepath")

    btn.click(analyze_sentiment, inputs=[user_text, lang_dropdown],
              outputs=[out_sent, out_conf, out_exp, out_file])

if __name__ == "__main__":
    demo.launch()