jeevitha-app commited on
Commit
5cf7d88
·
verified ·
1 Parent(s): dca10b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -69
app.py CHANGED
@@ -1,83 +1,89 @@
 
 
 
 
1
  import gradio as gr
2
  import joblib
3
- import re
4
  import numpy as np
 
 
5
 
6
- # ==========================================================
7
- # 🔹 1️⃣ Load Models and Vectorizers
8
- # ==========================================================
9
- english_model = joblib.load("logistic_regression_english.pkl")
10
- english_vectorizer = joblib.load("tfidf_vectorizer_english.pkl")
11
-
12
- persian_model = joblib.load("logistic_regression_persian.pkl")
13
- persian_vectorizer = joblib.load("tfidf_vectorizer_persian.pkl")
14
 
15
- # Label mapping from training
16
- label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
 
 
17
 
18
- # ==========================================================
19
- # 🔹 2️⃣ Preprocessing (must match training exactly)
20
- # ==========================================================
21
- def clean_english_text(text):
22
- text = text.lower()
23
- text = re.sub(r"http\S+|www\S+|https\S+", "", text)
24
- text = re.sub(r"[^a-zA-Z\s]", "", text)
25
- text = re.sub(r"\s+", " ", text).strip()
26
- return text
27
 
28
- def clean_persian_text(text):
29
- text = re.sub(r"[^\u0600-\u06FF\s]", "", text) # keep only Persian chars
30
- text = re.sub(r"\s+", " ", text).strip()
31
- return text
32
 
33
- # ==========================================================
34
- # 🔹 3️⃣ Prediction Function
35
- # ==========================================================
36
- def predict_sentiment(text, language):
37
  if not text.strip():
38
- return " Please enter some text."
39
-
40
- if language == "English":
41
- cleaned = clean_english_text(text)
42
- vec = english_vectorizer.transform([cleaned])
43
- probs = english_model.predict_proba(vec)[0]
44
- pred = np.argmax(probs)
45
- return f"Prediction: {label_map[pred]} ({probs[pred]:.2f} confidence)"
46
-
47
- elif language == "Persian":
48
- cleaned = clean_persian_text(text)
49
- vec = persian_vectorizer.transform([cleaned])
50
- probs = persian_model.predict_proba(vec)[0]
51
- pred = np.argmax(probs)
52
- return f"Prediction: {label_map[pred]} ({probs[pred]:.2f} confidence)"
53
 
 
 
 
 
54
  else:
55
- return "❌ Invalid language option selected."
 
56
 
57
- # ==========================================================
58
- # 🔹 4️⃣ Debug Info (Optional - Check Vocabulary Size)
59
- # ==========================================================
60
- print(f"✅ English vectorizer vocabulary size: {len(english_vectorizer.get_feature_names_out())}")
61
- print(f"✅ Persian vectorizer vocabulary size: {len(persian_vectorizer.get_feature_names_out())}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- # ==========================================================
64
- # 🔹 5️⃣ Gradio Interface
65
- # ==========================================================
66
- iface = gr.Interface(
67
- fn=predict_sentiment,
68
- inputs=[
69
- gr.Textbox(lines=3, label="Enter Text"),
70
- gr.Radio(["English", "Persian"], label="Select Language", value="English")
71
- ],
72
- outputs=gr.Textbox(label="Predicted Sentiment"),
73
- title="🌍 Multilingual Sentiment Classifier (English & Persian)",
74
- description="Choose your language and get sentiment prediction with confidence score.",
75
- examples=[
76
- ["This movie was amazing!", "English"],
77
- ["The worst experience ever", "English"],
78
- ["این فیلم خیلی بد بود", "Persian"],
79
- ["من این محصول را دوست دارم", "Persian"]
80
- ]
81
- )
82
 
83
- iface.launch()
 
 
 
 
 
1
+ # =====================================================
2
+ # 🌐 Multi-lingual Sentiment Analyzer (English + Persian)
3
+ # =====================================================
4
+
5
  import gradio as gr
6
  import joblib
 
7
  import numpy as np
8
+ import shap
9
+ import os
10
 
11
+ # -----------------------------------------------------
12
+ # Load Models and Vectorizers
13
+ # -----------------------------------------------------
14
+ MODEL_DIR = "models"
 
 
 
 
15
 
16
+ eng_model_path = os.path.join(MODEL_DIR, "logistic_regression_english.pkl")
17
+ eng_vectorizer_path = os.path.join(MODEL_DIR, "tfidf_vectorizer_english.pkl")
18
+ per_model_path = os.path.join(MODEL_DIR, "logistic_regression_persian.pkl")
19
+ per_vectorizer_path = os.path.join(MODEL_DIR, "tfidf_vectorizer_persian.pkl")
20
 
21
+ eng_model = joblib.load(eng_model_path)
22
+ eng_vectorizer = joblib.load(eng_vectorizer_path)
23
+ per_model = joblib.load(per_model_path)
24
+ per_vectorizer = joblib.load(per_vectorizer_path)
 
 
 
 
 
25
 
26
+ # -----------------------------------------------------
27
+ # Label mapping
28
+ # -----------------------------------------------------
29
+ label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
30
 
31
+ # -----------------------------------------------------
32
+ # Prediction Function
33
+ # -----------------------------------------------------
34
+ def predict_sentiment(text, lang):
35
  if not text.strip():
36
+ return "⚠️ Please enter some text.", "", "", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ # Select appropriate model and vectorizer
39
+ if lang == "English":
40
+ vectorizer = eng_vectorizer
41
+ model = eng_model
42
  else:
43
+ vectorizer = per_vectorizer
44
+ model = per_model
45
 
46
+ # Vectorize text
47
+ X = vectorizer.transform([text])
48
+ pred = model.predict(X)[0]
49
+ probs = model.predict_proba(X)[0]
50
+ conf = np.max(probs)
51
+ sentiment = label_map.get(pred, "Unknown")
52
+
53
+ # SHAP explanation
54
+ try:
55
+ explainer = shap.LinearExplainer(model, X, feature_dependence="independent")
56
+ shap_values = explainer.shap_values(X)
57
+ shap_html = shap.plots.text(explainer, X, display=False)
58
+ except Exception:
59
+ shap_html = "<p>⚠️ No explanation available for this input.</p>"
60
+
61
+ return f"Prediction: {sentiment}", f"Confidence: {conf:.3f}", shap_html, ""
62
+
63
+ # -----------------------------------------------------
64
+ # ✅ Build Gradio Interface
65
+ # -----------------------------------------------------
66
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
67
+ gr.Markdown("<h2 style='text-align:center;'>🌍 Multi-lingual Sentiment (English + Persian)</h2>")
68
+
69
+ with gr.Row():
70
+ comment = gr.Textbox(label="Comment", placeholder="Type your comment here...")
71
+ lang = gr.Radio(["English", "Persian"], label="Language", value="English")
72
+
73
+ predict_btn = gr.Button("Predict", variant="primary")
74
+
75
+ output1 = gr.Textbox(label="Prediction")
76
+ output2 = gr.Textbox(label="Confidence")
77
+ output3 = gr.HTML(label="Explanation")
78
 
79
+ predict_btn.click(
80
+ predict_sentiment,
81
+ inputs=[comment, lang],
82
+ outputs=[output1, output2, output3, gr.Textbox(visible=False)]
83
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ # -----------------------------------------------------
86
+ # ✅ Launch App
87
+ # -----------------------------------------------------
88
+ if __name__ == "__main__":
89
+ app.launch()