zafirabdullah12 commited on
Commit
0c6f77b
Β·
verified Β·
1 Parent(s): 18c8128

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +254 -38
app.py CHANGED
@@ -8,64 +8,280 @@ import pickle
8
  import string
9
  import numpy as np
10
  import gradio as gr
11
-
12
- from nltk.corpus import stopwords
13
  from keras.models import load_model
14
  from keras.preprocessing.sequence import pad_sequences
15
 
16
- nltk.download('stopwords')
17
 
18
- # Load Model and Tokenizer
19
- model = load_model("sentiment_analysis_best.keras")
 
 
 
 
20
 
21
  with open("tokenizer.pkl", "rb") as f:
22
  tokenizer = pickle.load(f)
23
 
24
- # Maximum sequence length used for padding and truncation
25
- MAX_LEN = 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # Stopwords list with negation preserved
28
- negations = {"not", "no", "nor", "never", "n't"}
29
- stop_words = set(stopwords.words("english")) - negations
 
30
 
31
  # Preprocessing Function
32
  def preprocess(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  text = text.lower()
 
 
 
 
 
34
  text = re.sub(r"\d+", "", text)
 
 
35
  text = text.translate(str.maketrans('', '', string.punctuation))
36
- words = [w for w in text.split() if w not in stop_words]
 
 
 
37
  return " ".join(words)
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # Prediction Function
40
  def predict_sentiment(text):
41
- text = preprocess(text)
42
- seq = tokenizer.texts_to_sequences([text])
43
- pad = pad_sequences(seq, maxlen=MAX_LEN, padding='post')
44
- pred = model.predict(pad)
45
-
 
 
 
 
 
 
 
 
 
 
 
 
46
  label_idx = np.argmax(pred, axis=1)[0]
47
- confidence = pred[0][label_idx] * 100
48
-
49
- labels = ["Negative", "Positive", "Neutral"]
50
-
51
- return f"{labels[label_idx]} {confidence:.2f}%"
52
-
53
- # Gradio Interface
54
- interface = gr.Interface(
55
- fn=predict_sentiment,
56
- inputs=gr.Textbox(
57
- lines=4,
58
- placeholder="Type your sentence here...",
59
- label="Text"
60
- ),
61
- outputs=gr.Textbox(label="Prediction"),
62
- title="Sentiment Analysis Application",
63
- description="Sentiment Analysis Classification using a LSTM DL Model.",
64
- examples=[
65
- ["I really love this product"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  ["This is the worst experience ever"],
67
- ["It is okay, not good not bad"]
 
 
 
 
 
 
 
 
 
68
  ]
69
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- interface.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  import string
9
  import numpy as np
10
  import gradio as gr
 
 
11
  from keras.models import load_model
12
  from keras.preprocessing.sequence import pad_sequences
13
 
 
14
 
15
+ # Constants
16
+ MAX_LEN = 100
17
+ MODEL_PATH = "sentiment_analysis_best.keras"
18
+ TOKENIZER_PATH = "tokenizer.pkl"
19
+
20
+ nltk.download('stopwords')
21
 
22
  with open("tokenizer.pkl", "rb") as f:
23
  tokenizer = pickle.load(f)
24
 
25
+ # Expand common English contractions
26
+ def expand_contractions(text):
27
+ """Expand common English contractions"""
28
+ contractions = {
29
+ "i'm": "i am", "you're": "you are", "he's": "he is",
30
+ "she's": "she is", "it's": "it is", "we're": "we are",
31
+ "they're": "they are", "i've": "i have", "you've": "you have",
32
+ "we've": "we have", "they've": "they have", "i'll": "i will",
33
+ "you'll": "you will", "he'll": "he will", "she'll": "she will",
34
+ "we'll": "we will", "they'll": "they will", "i'd": "i would",
35
+ "you'd": "you would", "he'd": "he would", "she'd": "she would",
36
+ "we'd": "we would", "they'd": "they would", "don't": "do not",
37
+ "doesn't": "does not", "didn't": "did not", "can't": "cannot",
38
+ "couldn't": "could not", "won't": "will not", "wouldn't": "would not",
39
+ "shouldn't": "should not", "isn't": "is not", "aren't": "are not",
40
+ "wasn't": "was not", "weren't": "were not", "hasn't": "has not",
41
+ "haven't": "have not", "hadn't": "had not", "mightn't": "might not",
42
+ "mustn't": "must not", "needn't": "need not", "shan't": "shall not"
43
+ }
44
 
45
+ for contraction, expansion in contractions.items():
46
+ text = re.sub(r'\b' + contraction + r'\b', expansion, text, flags=re.IGNORECASE)
47
+
48
+ return text
49
 
50
  # Preprocessing Function
51
  def preprocess(text):
52
+
53
+ # Define words to keep
54
+ negations = {"not", "no", "nor", "never", "n't", "nobody", "nothing", "neither", "nowhere", "none"}
55
+ important_words = {"am", "is", "are", "was", "were", "be", "been", "being"}
56
+
57
+ try:
58
+ from nltk.corpus import stopwords
59
+ stop_words = set(stopwords.words("english")) - negations - important_words
60
+ except:
61
+ # Fallback if NLTK not available
62
+ stop_words =set()
63
+
64
+ # Convert to lowercase
65
  text = text.lower()
66
+
67
+ # Expand contractions
68
+ text = expand_contractions(text)
69
+
70
+ # Remove digits
71
  text = re.sub(r"\d+", "", text)
72
+
73
+ # Remove punctuation
74
  text = text.translate(str.maketrans('', '', string.punctuation))
75
+
76
+ # Remove stopwords while keeping negations and important words
77
+ words = [w for w in text.split() if w not in stop_words or w in negations or w in important_words]
78
+
79
  return " ".join(words)
80
 
81
+ # Load Train Model and Tokenizer
82
+ def load_resources():
83
+ try:
84
+ # Load model
85
+ model = load_model(MODEL_PATH)
86
+ print(f"Model loaded successfully from {MODEL_PATH}")
87
+
88
+ # Load Tokenizer
89
+ with open(TOKENIZER_PATH, "rb") as f:
90
+ tokenizer = pickle.load(f)
91
+ print(f"Tokenizer loaded successfully from {TOKENIZER_PATH}")
92
+
93
+ return model, tokenizer
94
+
95
+ except FileNotFoundError as e:
96
+ print(f"Error: Model or Tokenizer file not found!")
97
+ print(f" Make sure {MODEL_PATH} AND {TOKENIZER_PATH} are in the same directory.")
98
+ raise e
99
+ except Exception as e:
100
+ print(f"Error loading resources: {e}")
101
+ raise e
102
+
103
+ # Load model and tokenizer globally
104
+ model, tokenizer = load_resources()
105
+
106
  # Prediction Function
107
  def predict_sentiment(text):
108
+ # Validate input
109
+ if not text or not text.strip():
110
+ return "⚠️ Neutral", "33.33%", "Please enter some text to analyze!"
111
+
112
+ # Preprocess text
113
+ processed_text = preprocess(text)
114
+
115
+ # Check if text is empty after preprocessing
116
+ if not processed_text.strip():
117
+ return "⚠️ Neutral", "33.33%", "Text is empty after preprocessing. Try adding more words."
118
+
119
+ # Tokenize and pad
120
+ seq = tokenizer.texts_to_sequences([processed_text])
121
+ padded = pad_sequences(seq, maxlen=MAX_LEN, padding='post')
122
+
123
+ # Predict
124
+ pred = model.predict(padded, verbose=0)
125
  label_idx = np.argmax(pred, axis=1)[0]
126
+ confidence = pred[0][label_idx]
127
+
128
+ # Map to label with emoji
129
+ labels = ["😞 Negative", "😊 Positive", "😐 Neutral"]
130
+ sentiment = labels[label_idx]
131
+ confidence_percentage = f"{confidence * 100:.2f}%"
132
+
133
+ # Create detailed results
134
+ detailed_results = f"""
135
+ ### πŸ“Š Detailed Analysis:
136
+
137
+ **Original Text:** {text}
138
+
139
+ **Processed Text:** {processed_text}
140
+
141
+ **Prediction Probabilities:**
142
+ - 😞 Negative: {pred[0][0] * 100:.2f}%
143
+ - 😊 Positive: {pred[0][1] * 100:.2f}%
144
+ - 😐 Neutral: {pred[0][2] * 100:.2f}%
145
+
146
+ **Final Sentiment:** {sentiment}
147
+ **Confidence:** {confidence_percentage}
148
+ """
149
+
150
+ return sentiment, confidence_percentage, detailed_results
151
+
152
+
153
+ # GRADIO INTERFACE
154
+ def create_gradio_interface():
155
+ """Create and configure Gradio interface"""
156
+
157
+ # Example texts for quick testing
158
+ examples = [
159
+ ["I'm so happy with my purchase! Highly recommended!"],
160
+ ["I don't like this at all. Very disappointing."],
161
+ ["I absolutely love this product! It's amazing!"],
162
+ ["This is the worst experience I've ever had."],
163
+ ["Fantastic! Best decision I ever made!"],
164
+ ["I'm not sure how I feel about this."],
165
+ ["It's okay, nothing special really."],
166
+ ["Amazing work! Best I've ever seen!"],
167
  ["This is the worst experience ever"],
168
+ ["This is terrible and I hate it"],
169
+ ["It works fine, no complaints."],
170
+ ["Not bad, but could be better."],
171
+ ["He is no good boy"],
172
+ ["I'm doing great"],
173
+ ["I'm not normal"],
174
+ ["Both of you"],
175
+ ["I am fine"],
176
+ ["I am good"],
177
+ ["I'm okay"]
178
  ]
179
+
180
+ # Create interface
181
+ with gr.Blocks(theme=gr.themes.Soft(), title="Sentiment Analysis") as interface:
182
+
183
+ # Header
184
+ gr.Markdown("""
185
+ # 🎭 Sentiment Analysis - AI Powered
186
+ ### Analyze the sentiment of your text using Deep Learning (LSTM Model)
187
+
188
+ **Instructions:** Enter any text in English and the model will predict whether it's Positive, Negative, or Neutral.
189
+ """)
190
+
191
+ # Main interface
192
+ with gr.Row():
193
+ with gr.Column(scale=1):
194
+ # Input
195
+ text_input = gr.Textbox(
196
+ label="πŸ“ Enter Your Text",
197
+ placeholder="Type your text here... (e.g., 'I love this product!')",
198
+ lines=5,
199
+ max_lines=10
200
+ )
201
+
202
+ # Buttons
203
+ with gr.Row():
204
+ analyze_btn = gr.Button("πŸ” Analyze Sentiment", variant="primary", size="lg")
205
+ clear_btn = gr.ClearButton([text_input], value="πŸ—‘οΈ Clear", size="lg")
206
+
207
+ with gr.Column(scale=1):
208
+ # Outputs
209
+ sentiment_output = gr.Textbox(
210
+ label="🎯 Predicted Sentiment",
211
+ interactive=False
212
+ )
213
+ confidence_output = gr.Textbox(
214
+ label="πŸ“ˆ Confidence Score",
215
+ interactive=False
216
+ )
217
+
218
+ # Detailed results
219
+ detailed_output = gr.Markdown(
220
+ label="πŸ“Š Detailed Analysis",
221
+ value="Results will appear here after analysis..."
222
+ )
223
+
224
+ # Examples
225
+ gr.Markdown("### πŸ’‘ Try These Examples:")
226
+ gr.Examples(
227
+ examples=examples,
228
+ inputs=text_input,
229
+ outputs=[sentiment_output, confidence_output, detailed_output],
230
+ fn=predict_sentiment,
231
+ cache_examples=False
232
+ )
233
+
234
+ # Footer info
235
+ gr.Markdown("""
236
+ ---
237
+ **Model Information:**
238
+ - Architecture: Bidirectional LSTM with Embedding Layer
239
+ - Classes: Negative (0), Positive (1), Neutral (2)
240
+ - Max Sequence Length: 100 tokens
241
+
242
+ **Tips for Best Results:**
243
+ - Use clear, complete sentences
244
+ - The model works best with English text
245
+ - Longer texts provide more context for accurate predictions
246
+ """)
247
+
248
+ # Connect button to function
249
+ analyze_btn.click(
250
+ fn=predict_sentiment,
251
+ inputs=text_input,
252
+ outputs=[sentiment_output, confidence_output, detailed_output]
253
+ )
254
+
255
+ # Also trigger on Enter key
256
+ text_input.submit(
257
+ fn=predict_sentiment,
258
+ inputs=text_input,
259
+ outputs=[sentiment_output, confidence_output, detailed_output]
260
+ )
261
+
262
+ return interface
263
+
264
 
265
+ # MAIN EXECUTION
266
+ if __name__ == "__main__":
267
+ print("\n" + "=" * 70)
268
+ print("πŸš€ Starting Sentiment Analysis Gradio Interface...")
269
+ print("=" * 70)
270
+
271
+ # Create and launch interface
272
+ interface = create_gradio_interface()
273
+
274
+ # Launch with configuration
275
+ interface.launch(
276
+ server_name="0.0.0.0", # Allow external access
277
+ server_port=7860, # Default Gradio port
278
+ share=False, # Set to True for public URL
279
+ inbrowser=True, # Auto-open in browser
280
+ show_error=True # Show errors in interface
281
+ )
282
+
283
+ print("\n" + "=" * 70)
284
+ print("βœ“ Interface is running!")
285
+ print(" Local URL: http://localhost:7860")
286
+ print(" Press Ctrl+C to stop the server")
287
+ print("=" * 70)