Spaces:
Runtime error
Runtime error
File size: 4,083 Bytes
eced96d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | import gradio as gr
import joblib
import os
import re
import emoji
import demoji
import numpy as np
# ==========================================================
# 📦 Load all models
# ==========================================================
vectorizer_en = joblib.load("tfidf_vectorizer_en.pkl")
le_en = joblib.load("label_encoder_en.pkl")
stacking_en = joblib.load("stacking_en.pkl")
vectorizer_fa = joblib.load("tfidf_vectorizer_fa.pkl")
le_fa = joblib.load("label_encoder_fa.pkl")
stacking_fa = joblib.load("stacking_fa.pkl")
# ==========================================================
# 🧹 Text cleaning functions
# ==========================================================
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from hazm import Normalizer, Lemmatizer as HazmLemmatizer, word_tokenize as hazm_tokenize
nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")
# English preprocess
lemmatizer = WordNetLemmatizer()
STOPWORDS = set(stopwords.words("english"))
RE_URL = re.compile(r"http\S+|www\.\S+")
RE_HTML = re.compile(r"<.*?>")
RE_NONALPHA = re.compile(r"[^a-zA-Z\s]")
def preprocess_english(text):
text = str(text).lower()
text = emoji.demojize(text)
text = demoji.replace(text, "")
text = RE_URL.sub(" ", text)
text = RE_HTML.sub(" ", text)
text = RE_NONALPHA.sub(" ", text)
text = re.sub(r"\s+", " ", text).strip()
tokens = word_tokenize(text)
tokens = [lemmatizer.lemmatize(t) for t in tokens if t not in STOPWORDS and len(t) > 2]
return " ".join(tokens)
# Persian preprocess
normalizer = Normalizer()
hazm_lemmatizer = HazmLemmatizer()
RE_URL_FA = re.compile(r"http\S+|www\.\S+")
RE_NONPERSIAN = re.compile(r"[^\u0600-\u06FFA-Za-z\s]")
def preprocess_persian(text):
text = str(text)
text = normalizer.normalize(text)
text = emoji.demojize(text)
text = demoji.replace(text, "")
text = RE_URL_FA.sub(" ", text)
text = re.sub(r"@\w+|#\w+|\d+", " ", text)
text = RE_NONPERSIAN.sub(" ", text)
text = re.sub(r"\s+", " ", text).strip()
tokens = hazm_tokenize(text)
tokens = [hazm_lemmatizer.lemmatize(t) for t in tokens if len(t) > 1]
return " ".join(tokens)
# ==========================================================
# 🔮 Prediction function
# ==========================================================
def predict_sentiment(comment, language):
if language == "English":
clean_text = preprocess_english(comment)
X = vectorizer_en.transform([clean_text])
pred = stacking_en.predict(X)[0]
probs = stacking_en.predict_proba(X)[0]
classes = le_en.classes_
else:
clean_text = preprocess_persian(comment)
X = vectorizer_fa.transform([clean_text])
pred = stacking_fa.predict(X)[0]
probs = stacking_fa.predict_proba(X)[0]
classes = le_fa.classes_
result_str = f"🔹 **Predicted Sentiment:** {pred}\n\n"
prob_table = "\n".join([f"{cls}: {round(p,3)}" for cls, p in zip(classes, probs)])
return f"🗣️ **Input:** {comment}\n\n{result_str}**Prediction Probabilities:**\n{prob_table}"
# ==========================================================
# 🎨 Gradio UI
# ==========================================================
lang_dropdown = gr.Dropdown(["English", "Persian"], label="Select Language", value="English")
input_box = gr.Textbox(label="Enter your comment here")
output_box = gr.Markdown()
iface = gr.Interface(
fn=predict_sentiment,
inputs=[input_box, lang_dropdown],
outputs=output_box,
title="🌍 Multilingual Sentiment Analyzer (English + Persian)",
description="Enter a comment in English or Persian to see the predicted sentiment and probabilities.",
examples=[
["I loved the show! It was amazing!", "English"],
["برنامه خیلی عالی بود و مجری هم خوب بود", "Persian"],
["It was an average episode, not too bad.", "English"],
]
)
if __name__ == "__main__":
iface.launch()
|