tarneemalaa commited on
Commit
21f3981
·
verified ·
1 Parent(s): 86766b1

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +114 -0
  2. models.zip +3 -0
  3. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import BertTokenizer, BertForSequenceClassification
4
+ import joblib
5
+ import numpy as np
6
+
7
+ # Load TF-IDF Vectorizer
8
+ tfidf_vectorizer = joblib.load("models/tfidf_vectorizer.pkl")
9
+
10
+ # Load the classical ML models
11
+ lr_model = joblib.load("models/logistic_regression_tfidf.pkl")
12
+ svm_model = joblib.load("models/svm_tfidf_model.pkl")
13
+ nb_model = joblib.load("models/nb_tfidf_model.pkl")
14
+ rf_model = joblib.load("models/rf_tfidf_model.pkl")
15
+
16
+ # Load bert fine-tuned mmodel and tokenizer
17
+ model_name = "tarneemalaa/bert_imdb_model"
18
+ tokenizer = BertTokenizer.from_pretrained(model_name)
19
+ model = BertForSequenceClassification.from_pretrained(model_name)
20
+
21
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
+ model.to(device)
23
+ model.eval()
24
+
25
+ # Prediction function
26
+ def predict_sentiment(model_picked, text, max_len=256):
27
+ if not text or text.strip() == "":
28
+ return "Please enter some text to analyze"
29
+
30
+ # in case bert is chosen
31
+ if model_picked == "BERT (Fine-tuned)":
32
+ inputs = tokenizer(text, truncation=True, padding="max_length", max_length=max_len, return_tensors='pt')
33
+ input_ids = inputs['input_ids'].to(device)
34
+ attention_mask = inputs['attention_mask'].to(device)
35
+
36
+ with torch.no_grad():
37
+ output = model(input_ids=input_ids, attention_mask=attention_mask)
38
+ logits = output.logits
39
+ probs = torch.softmax(logits, dim=1)
40
+ pred_label = torch.argmax(probs, dim=1).item()
41
+ confidence = probs[0][pred_label].item()
42
+ confidence_display = f"{confidence:.2%}"
43
+
44
+ ## for the classical models
45
+ else:
46
+ vectorized = tfidf_vectorizer.transform([text])
47
+
48
+ if model_picked == "Logistic Regression":
49
+ probs = lr_model.predict_proba(vectorized)[0]
50
+ pred_label = int(np.argmax(probs))
51
+ confidence = probs[pred_label]
52
+ confidence_display = f"{confidence:.2%}"
53
+
54
+ elif model_picked == "SVM":
55
+ pred_label = int(svm_model.predict(vectorized)[0])
56
+ confidence_display = "<i>Not available for SVM</i>"
57
+
58
+ elif model_picked == "Naive Bayes":
59
+ probs = nb_model.predict_proba(vectorized)[0]
60
+ pred_label = int(np.argmax(probs))
61
+ confidence = probs[pred_label]
62
+ confidence_display = f"{confidence:.2%}"
63
+
64
+ elif model_picked == "Random Forest":
65
+ probs = rf_model.predict_proba(vectorized)[0]
66
+ pred_label = int(np.argmax(probs))
67
+ confidence = probs[pred_label]
68
+ confidence_display = f"{confidence:.2%}"
69
+
70
+
71
+ sentiment = "Positive" if pred_label == 1 else "Negative"
72
+ emoji = "✅" if sentiment == "Positive" else "❌"
73
+ color = "green" if sentiment == "Positive" else "red"
74
+
75
+ return f"""
76
+ <div style="font-size: 24px; font-weight: bold; color: {color}; margin-bottom: 10px;">
77
+ {emoji} Sentiment: {sentiment}
78
+ </div>
79
+ <div style="font-size: 18px; color: #666;">
80
+ Confidence: {confidence_display}
81
+ </div>
82
+ """
83
+
84
+ demo = gr.Interface(
85
+ fn=predict_sentiment,
86
+ inputs=[
87
+ gr.Dropdown(
88
+ choices=[
89
+ "BERT (Fine-tuned)",
90
+ "Logistic Regression",
91
+ "SVM",
92
+ "Naive Bayes",
93
+ "Random Forest"
94
+ ],
95
+ label="Choose Model",
96
+ value="BERT (Fine-tuned)"
97
+ ),
98
+ gr.Textbox(lines=6, placeholder="Paste a movie review here...", label="🎬 Movie Review")
99
+ ],
100
+ outputs=gr.HTML(label="Prediction Result"),
101
+ title="🎬 IMDb Sentiment Classifier",
102
+ description="This app allows you to **compare** a **fine-tuned BERT** model with **classical ML models** (Logistic Regression, SVM, Naive Bayes, Random Forest) on IMDb movie reviews.\n\nMade by [Tarneem Alaa](https://github.com/tarneemalaa1)",
103
+ theme=gr.themes.Soft(),
104
+ examples=[
105
+ ["BERT (Fine-tuned)", "This movie was absolutely amazing, I enjoyed every moment of it!"],
106
+ ["Logistic Regression", "It was a total waste of time. The plot made no sense."],
107
+ ["SVM", "Great acting and wonderful storyline. Highly recommend!"],
108
+ ["Naive Bayes", "Boring and predictable. Not worth watching."]
109
+ ],
110
+ flagging_mode="never"
111
+ )
112
+
113
+ if __name__ == "__main__":
114
+ demo.launch()
models.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a5329a92001be3b64101dd58b924ed9d121497003047bbda75a9774bd6c2446
3
+ size 18738202
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ scikit-learn
5
+ numpy
6
+ joblib