Afathman commited on
Commit
6655e12
·
verified ·
1 Parent(s): 2fca55c

Upload 9 files

Browse files
Files changed (1) hide show
  1. app.py +165 -95
app.py CHANGED
@@ -13,11 +13,13 @@ try:
13
  day_encoder = joblib.load('day_encoder.pkl')
14
  feature_names = joblib.load('feature_names.pkl')
15
  model_results = joblib.load('model_results.pkl')
 
16
  except Exception as e:
17
- print(f"Error loading models: {e}")
18
 
19
  # Load sentiment analysis pipeline
20
  sentiment = pipeline("sentiment-analysis")
 
21
  # Expanded content classification labels
22
  classification_labels = [
23
  "engaging", "promotional", "informative", "urgent", "personal", "spammy",
@@ -26,8 +28,19 @@ classification_labels = [
26
  classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
27
 
28
  # Load chatbot model (google/flan-t5-large)
29
- chatbot_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
30
- chatbot_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  def extract_text_features(text):
33
  if pd.isna(text) or text == '':
@@ -86,76 +99,98 @@ def section_suggestion(section, features):
86
  return ""
87
 
88
  def predict_email_performance(subject, preview_text, body_text, day_of_week, send_time, target_metric):
89
- # Extract text features
90
- subject_features = extract_text_features(subject)
91
- preview_features = extract_text_features(preview_text)
92
- body_features = extract_text_features(body_text)
93
- # Parse send time
94
- try:
95
- send_hour = datetime.strptime(send_time, '%I:%M %p').hour
96
- except:
97
- send_hour = 9 # Default to 9 AM
98
- # Encode categorical variables
99
  try:
100
- day_encoded = day_encoder.transform([day_of_week])[0]
101
- except:
102
- day_encoded = 0 # Default encoding
103
- # Create feature vector (no list or audience size)
104
- features = [
105
- 500000, # Placeholder for audience size (kept for model compatibility)
106
- send_hour,
107
- day_encoded,
108
- 0 # Placeholder for list (kept for model compatibility)
109
- ]
110
- # Add text features in correct order
111
- for feats in [subject_features, preview_features]:
112
- for suffix in ['length', 'word_count', 'exclamation_count', 'question_count', 'emoji_count', 'number_count', 'caps_ratio']:
113
- features.append(feats[suffix])
114
- # For body, just append features (if you want to use them in the model, retrain with these features)
115
- for suffix in ['length', 'word_count', 'exclamation_count', 'question_count', 'emoji_count', 'number_count', 'caps_ratio']:
116
- features.append(body_features[suffix])
117
- # Scale features (truncate or pad to match model input)
118
- features = features[:len(feature_names)]
119
- features_scaled = scaler.transform([features])
120
- # Make prediction
121
- model = models[target_metric]
122
- prediction = model.predict(features_scaled)[0]
123
- # Convert to percentage and ensure reasonable bounds
124
- if target_metric == 'open_rate':
125
- prediction = max(0, min(1, prediction)) * 100
126
- elif target_metric == 'click_rate':
127
- prediction = max(0, min(0.5, prediction)) * 100
128
- else: # unsubscribe_rate
129
- prediction = max(0, min(0.1, prediction)) * 100
130
- return prediction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  def analyze_email_complete(subject, preview_text, body_text, day_of_week, send_time, target_metric):
133
  # Section features and scores
134
  subject_features = extract_text_features(subject)
135
  preview_features = extract_text_features(preview_text)
136
  body_features = extract_text_features(body_text)
 
137
  subject_score = section_score(subject_features)
138
  preview_score = section_score(preview_features)
139
  body_score = section_score(body_features)
 
140
  # Section suggestions
141
  subject_sugg = section_suggestion("subject", subject_features)
142
  preview_sugg = section_suggestion("preview", preview_features)
143
  body_sugg = section_suggestion("body", body_features)
 
144
  # Overall performance score (weighted avg)
145
  performance_score = int(round(0.4 * subject_score + 0.3 * preview_score + 0.3 * body_score))
 
146
  # Predicted metric
147
  predicted_value = predict_email_performance(subject, preview_text, body_text, day_of_week, send_time, target_metric)
 
148
  # Sentiment analysis
149
  text_for_sentiment = f"{subject}\n{preview_text}\n{body_text}"
150
  sentiment_result = sentiment(text_for_sentiment)[0]
 
151
  # Zero-shot classification
152
  classification_result = classifier(text_for_sentiment, classification_labels)
 
153
  # Format output
154
  metric_label = {
155
  "open_rate": "Open Rate",
156
  "click_rate": "Click Rate",
157
  "unsubscribe_rate": "Unsubscribe Rate"
158
  }[target_metric]
 
159
  output = f"""
160
  ## 📊 Performance Score: {performance_score}/100
161
 
@@ -174,8 +209,10 @@ def analyze_email_complete(subject, preview_text, body_text, day_of_week, send_t
174
 
175
  ### 🏷️ Content Classification
176
  """
 
177
  for i, (label, score) in enumerate(zip(classification_result['labels'][:6], classification_result['scores'][:6])):
178
  output += f"- **{label.title()}**: {score:.2f}\n"
 
179
  output += f"""
180
  ### 📋 Email Details
181
  - **Subject Length:** {subject_features['length']} characters
@@ -186,61 +223,86 @@ def analyze_email_complete(subject, preview_text, body_text, day_of_week, send_t
186
  ---
187
  #### 💬 Ask the Email Optimization Chatbot below for advice!
188
  """
189
- # Save context for chatbot
190
- gr.set_state({
191
- "last_input": {
192
- "subject": subject,
193
- "preview_text": preview_text,
194
- "body_text": body_text,
195
- "day_of_week": day_of_week,
196
- "send_time": send_time,
197
- "target_metric": target_metric,
198
- "scores": {
199
- "performance_score": performance_score,
200
- "subject_score": subject_score,
201
- "preview_score": preview_score,
202
- "body_score": body_score,
203
- "predicted_value": predicted_value
204
- },
205
- "suggestions": {
206
- "subject": subject_sugg,
207
- "preview": preview_sugg,
208
- "body": body_sugg
209
- },
210
- "sentiment": sentiment_result,
211
- "classification": classification_result
212
- }
213
- })
214
- return output
215
 
216
- def chatbot_response(user_message, state):
217
- # Retrieve last analysis context
218
- context = state.get("last_input", {})
219
- if not context:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  return "Please analyze an email first, then ask your question here."
221
- # Compose prompt for Flan-T5
222
- prompt = f"""You are an expert email marketing assistant. Here is the analysis of an email campaign:
223
- Subject: {context['subject']}
224
- Preview: {context['preview_text']}
225
- Body: {context['body_text']}
226
- Day: {context['day_of_week']}
227
- Send Time: {context['send_time']}
228
- Target Metric: {context['target_metric']}
229
- Scores: {context['scores']}
230
- Suggestions: {context['suggestions']}
231
- Sentiment: {context['sentiment']}
232
- Classification: {context['classification']['labels'][:6]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  User question: {user_message}
234
- Give a specific, actionable answer based on the above analysis."""
235
- # Generate response
236
- inputs = chatbot_tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
237
- outputs = chatbot_model.generate(**inputs, max_new_tokens=256)
238
- answer = chatbot_tokenizer.decode(outputs[0], skip_special_tokens=True)
239
- return answer
 
 
 
 
 
 
 
 
 
 
 
240
 
241
  # Available options
242
  day_options = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
243
 
 
244
  with gr.Blocks() as demo:
245
  with gr.Row():
246
  with gr.Column():
@@ -254,16 +316,24 @@ with gr.Blocks() as demo:
254
  analyze_btn = gr.Button("Analyze Email")
255
  with gr.Column():
256
  analysis_output = gr.Markdown()
 
 
 
 
 
257
  chatbot = gr.ChatInterface(
258
  fn=chatbot_response,
259
- additional_inputs=[gr.State()],
260
  title="Email Optimization Chatbot",
261
  description="Ask for advice on how to improve your email based on the analysis above."
262
  )
 
 
263
  analyze_btn.click(
264
  analyze_email_complete,
265
  inputs=[subject, preview_text, body_text, day_of_week, send_time, target_metric],
266
- outputs=analysis_output
267
  )
268
 
269
- demo.launch()
 
 
13
  day_encoder = joblib.load('day_encoder.pkl')
14
  feature_names = joblib.load('feature_names.pkl')
15
  model_results = joblib.load('model_results.pkl')
16
+ print("✅ Models loaded successfully!")
17
  except Exception as e:
18
+ print(f"Error loading models: {e}")
19
 
20
  # Load sentiment analysis pipeline
21
  sentiment = pipeline("sentiment-analysis")
22
+
23
  # Expanded content classification labels
24
  classification_labels = [
25
  "engaging", "promotional", "informative", "urgent", "personal", "spammy",
 
28
  classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
29
 
30
  # Load chatbot model (google/flan-t5-large)
31
+ try:
32
+ chatbot_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
33
+ chatbot_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
34
+ print("✅ Chatbot model loaded successfully!")
35
+ except Exception as e:
36
+ print(f"❌ Error loading chatbot model: {e}")
37
+ # Fallback to smaller model if large one fails
38
+ try:
39
+ chatbot_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
40
+ chatbot_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
41
+ print("✅ Fallback chatbot model loaded successfully!")
42
+ except Exception as e2:
43
+ print(f"❌ Error loading fallback model: {e2}")
44
 
45
  def extract_text_features(text):
46
  if pd.isna(text) or text == '':
 
99
  return ""
100
 
101
  def predict_email_performance(subject, preview_text, body_text, day_of_week, send_time, target_metric):
 
 
 
 
 
 
 
 
 
 
102
  try:
103
+ # Extract text features
104
+ subject_features = extract_text_features(subject)
105
+ preview_features = extract_text_features(preview_text)
106
+ body_features = extract_text_features(body_text)
107
+
108
+ # Parse send time
109
+ try:
110
+ send_hour = datetime.strptime(send_time, '%I:%M %p').hour
111
+ except:
112
+ send_hour = 9 # Default to 9 AM
113
+
114
+ # Encode categorical variables
115
+ try:
116
+ day_encoded = day_encoder.transform([day_of_week])[0]
117
+ except:
118
+ day_encoded = 0 # Default encoding
119
+
120
+ # Create feature vector (no list or audience size)
121
+ features = [
122
+ 500000, # Placeholder for audience size (kept for model compatibility)
123
+ send_hour,
124
+ day_encoded,
125
+ 0 # Placeholder for list (kept for model compatibility)
126
+ ]
127
+
128
+ # Add text features in correct order
129
+ for feats in [subject_features, preview_features]:
130
+ for suffix in ['length', 'word_count', 'exclamation_count', 'question_count', 'emoji_count', 'number_count', 'caps_ratio']:
131
+ features.append(feats[suffix])
132
+
133
+ # Scale features (truncate or pad to match model input)
134
+ if len(features) > len(feature_names):
135
+ features = features[:len(feature_names)]
136
+ elif len(features) < len(feature_names):
137
+ features.extend([0] * (len(feature_names) - len(features)))
138
+
139
+ features_scaled = scaler.transform([features])
140
+
141
+ # Make prediction
142
+ model = models[target_metric]
143
+ prediction = model.predict(features_scaled)[0]
144
+
145
+ # Convert to percentage and ensure reasonable bounds
146
+ if target_metric == 'open_rate':
147
+ prediction = max(0, min(1, prediction)) * 100
148
+ elif target_metric == 'click_rate':
149
+ prediction = max(0, min(0.5, prediction)) * 100
150
+ else: # unsubscribe_rate
151
+ prediction = max(0, min(0.1, prediction)) * 100
152
+
153
+ return prediction
154
+
155
+ except Exception as e:
156
+ print(f"Prediction error: {e}")
157
+ return 2.5 # Default prediction
158
 
159
  def analyze_email_complete(subject, preview_text, body_text, day_of_week, send_time, target_metric):
160
  # Section features and scores
161
  subject_features = extract_text_features(subject)
162
  preview_features = extract_text_features(preview_text)
163
  body_features = extract_text_features(body_text)
164
+
165
  subject_score = section_score(subject_features)
166
  preview_score = section_score(preview_features)
167
  body_score = section_score(body_features)
168
+
169
  # Section suggestions
170
  subject_sugg = section_suggestion("subject", subject_features)
171
  preview_sugg = section_suggestion("preview", preview_features)
172
  body_sugg = section_suggestion("body", body_features)
173
+
174
  # Overall performance score (weighted avg)
175
  performance_score = int(round(0.4 * subject_score + 0.3 * preview_score + 0.3 * body_score))
176
+
177
  # Predicted metric
178
  predicted_value = predict_email_performance(subject, preview_text, body_text, day_of_week, send_time, target_metric)
179
+
180
  # Sentiment analysis
181
  text_for_sentiment = f"{subject}\n{preview_text}\n{body_text}"
182
  sentiment_result = sentiment(text_for_sentiment)[0]
183
+
184
  # Zero-shot classification
185
  classification_result = classifier(text_for_sentiment, classification_labels)
186
+
187
  # Format output
188
  metric_label = {
189
  "open_rate": "Open Rate",
190
  "click_rate": "Click Rate",
191
  "unsubscribe_rate": "Unsubscribe Rate"
192
  }[target_metric]
193
+
194
  output = f"""
195
  ## 📊 Performance Score: {performance_score}/100
196
 
 
209
 
210
  ### 🏷️ Content Classification
211
  """
212
+
213
  for i, (label, score) in enumerate(zip(classification_result['labels'][:6], classification_result['scores'][:6])):
214
  output += f"- **{label.title()}**: {score:.2f}\n"
215
+
216
  output += f"""
217
  ### 📋 Email Details
218
  - **Subject Length:** {subject_features['length']} characters
 
223
  ---
224
  #### 💬 Ask the Email Optimization Chatbot below for advice!
225
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
+ # Create context for chatbot
228
+ context = {
229
+ "subject": subject,
230
+ "preview_text": preview_text,
231
+ "body_text": body_text,
232
+ "day_of_week": day_of_week,
233
+ "send_time": send_time,
234
+ "target_metric": target_metric,
235
+ "scores": {
236
+ "performance_score": performance_score,
237
+ "subject_score": subject_score,
238
+ "preview_score": preview_score,
239
+ "body_score": body_score,
240
+ "predicted_value": predicted_value
241
+ },
242
+ "suggestions": {
243
+ "subject": subject_sugg,
244
+ "preview": preview_sugg,
245
+ "body": body_sugg
246
+ },
247
+ "sentiment": sentiment_result,
248
+ "classification": classification_result
249
+ }
250
+
251
+ return output, context
252
+
253
+ def chatbot_response(user_message, context):
254
+ # Check if context exists
255
+ if not context or not isinstance(context, dict):
256
  return "Please analyze an email first, then ask your question here."
257
+
258
+ try:
259
+ # Compose prompt for Flan-T5
260
+ prompt = f"""You are an expert email marketing assistant. Here is the analysis of an email campaign:
261
+
262
+ Subject: {context.get('subject', 'N/A')}
263
+ Preview: {context.get('preview_text', 'N/A')}
264
+ Body: {context.get('body_text', 'N/A')}
265
+ Day: {context.get('day_of_week', 'N/A')}
266
+ Send Time: {context.get('send_time', 'N/A')}
267
+ Target Metric: {context.get('target_metric', 'N/A')}
268
+
269
+ Performance Score: {context.get('scores', {}).get('performance_score', 'N/A')}/100
270
+ Subject Score: {context.get('scores', {}).get('subject_score', 'N/A')}/100
271
+ Preview Score: {context.get('scores', {}).get('preview_score', 'N/A')}/100
272
+ Body Score: {context.get('scores', {}).get('body_score', 'N/A')}/100
273
+ Predicted Value: {context.get('scores', {}).get('predicted_value', 'N/A')}%
274
+
275
+ Current Suggestions:
276
+ - Subject: {context.get('suggestions', {}).get('subject', 'N/A')}
277
+ - Preview: {context.get('suggestions', {}).get('preview', 'N/A')}
278
+ - Body: {context.get('suggestions', {}).get('body', 'N/A')}
279
+
280
+ Sentiment: {context.get('sentiment', {}).get('label', 'N/A')}
281
+ Top Classifications: {', '.join(context.get('classification', {}).get('labels', [])[:3])}
282
+
283
  User question: {user_message}
284
+
285
+ Give a specific, actionable answer based on the above analysis. Be concise and practical."""
286
+
287
+ # Generate response
288
+ inputs = chatbot_tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
289
+ outputs = chatbot_model.generate(**inputs, max_new_tokens=256, do_sample=True, temperature=0.7)
290
+ answer = chatbot_tokenizer.decode(outputs[0], skip_special_tokens=True)
291
+
292
+ # Remove the original prompt from the answer if it's included
293
+ if prompt in answer:
294
+ answer = answer.replace(prompt, "").strip()
295
+
296
+ return answer if answer else "I'm sorry, I couldn't generate a response. Please try rephrasing your question."
297
+
298
+ except Exception as e:
299
+ print(f"Chatbot error: {e}")
300
+ return "I'm having trouble generating a response right now. Please try again."
301
 
302
  # Available options
303
  day_options = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
304
 
305
+ # Create Gradio interface
306
  with gr.Blocks() as demo:
307
  with gr.Row():
308
  with gr.Column():
 
316
  analyze_btn = gr.Button("Analyze Email")
317
  with gr.Column():
318
  analysis_output = gr.Markdown()
319
+
320
+ # State to store context
321
+ state = gr.State()
322
+
323
+ # Chatbot interface
324
  chatbot = gr.ChatInterface(
325
  fn=chatbot_response,
326
+ additional_inputs=[state],
327
  title="Email Optimization Chatbot",
328
  description="Ask for advice on how to improve your email based on the analysis above."
329
  )
330
+
331
+ # Connect the analyze button
332
  analyze_btn.click(
333
  analyze_email_complete,
334
  inputs=[subject, preview_text, body_text, day_of_week, send_time, target_metric],
335
+ outputs=[analysis_output, state]
336
  )
337
 
338
+ if __name__ == "__main__":
339
+ demo.launch()