Spaces:

Afathman
/

email-performance-predictor

Sleeping

App Files Files Community

Afathman commited on Aug 3, 2025

Commit

6655e12

verified ·

1 Parent(s): 2fca55c

Upload 9 files

Browse files

Files changed (1) hide show

app.py +165 -95

app.py CHANGED Viewed

@@ -13,11 +13,13 @@ try:
     day_encoder = joblib.load('day_encoder.pkl')
     feature_names = joblib.load('feature_names.pkl')
     model_results = joblib.load('model_results.pkl')
 except Exception as e:
-    print(f"Error loading models: {e}")
 # Load sentiment analysis pipeline
 sentiment = pipeline("sentiment-analysis")
 # Expanded content classification labels
 classification_labels = [
     "engaging", "promotional", "informative", "urgent", "personal", "spammy",
@@ -26,8 +28,19 @@ classification_labels = [
 classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 # Load chatbot model (google/flan-t5-large)
-chatbot_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
-chatbot_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
 def extract_text_features(text):
     if pd.isna(text) or text == '':
@@ -86,76 +99,98 @@ def section_suggestion(section, features):
     return ""
 def predict_email_performance(subject, preview_text, body_text, day_of_week, send_time, target_metric):
-    # Extract text features
-    subject_features = extract_text_features(subject)
-    preview_features = extract_text_features(preview_text)
-    body_features = extract_text_features(body_text)
-    # Parse send time
-    try:
-        send_hour = datetime.strptime(send_time, '%I:%M %p').hour
-    except:
-        send_hour = 9  # Default to 9 AM
-    # Encode categorical variables
     try:
-        day_encoded = day_encoder.transform([day_of_week])[0]
-    except:
-        day_encoded = 0  # Default encoding
-    # Create feature vector (no list or audience size)
-    features = [
-        500000,  # Placeholder for audience size (kept for model compatibility)
-        send_hour,
-        day_encoded,
-        0  # Placeholder for list (kept for model compatibility)
-    ]
-    # Add text features in correct order
-    for feats in [subject_features, preview_features]:
-        for suffix in ['length', 'word_count', 'exclamation_count', 'question_count', 'emoji_count', 'number_count', 'caps_ratio']:
-            features.append(feats[suffix])
-    # For body, just append features (if you want to use them in the model, retrain with these features)
-    for suffix in ['length', 'word_count', 'exclamation_count', 'question_count', 'emoji_count', 'number_count', 'caps_ratio']:
-        features.append(body_features[suffix])
-    # Scale features (truncate or pad to match model input)
-    features = features[:len(feature_names)]
-    features_scaled = scaler.transform([features])
-    # Make prediction
-    model = models[target_metric]
-    prediction = model.predict(features_scaled)[0]
-    # Convert to percentage and ensure reasonable bounds
-    if target_metric == 'open_rate':
-        prediction = max(0, min(1, prediction)) * 100
-    elif target_metric == 'click_rate':
-        prediction = max(0, min(0.5, prediction)) * 100
-    else:  # unsubscribe_rate
-        prediction = max(0, min(0.1, prediction)) * 100
-    return prediction
 def analyze_email_complete(subject, preview_text, body_text, day_of_week, send_time, target_metric):
     # Section features and scores
     subject_features = extract_text_features(subject)
     preview_features = extract_text_features(preview_text)
     body_features = extract_text_features(body_text)
     subject_score = section_score(subject_features)
     preview_score = section_score(preview_features)
     body_score = section_score(body_features)
     # Section suggestions
     subject_sugg = section_suggestion("subject", subject_features)
     preview_sugg = section_suggestion("preview", preview_features)
     body_sugg = section_suggestion("body", body_features)
     # Overall performance score (weighted avg)
     performance_score = int(round(0.4 * subject_score + 0.3 * preview_score + 0.3 * body_score))
     # Predicted metric
     predicted_value = predict_email_performance(subject, preview_text, body_text, day_of_week, send_time, target_metric)
     # Sentiment analysis
     text_for_sentiment = f"{subject}\n{preview_text}\n{body_text}"
     sentiment_result = sentiment(text_for_sentiment)[0]
     # Zero-shot classification
     classification_result = classifier(text_for_sentiment, classification_labels)
     # Format output
     metric_label = {
         "open_rate": "Open Rate",
         "click_rate": "Click Rate",
         "unsubscribe_rate": "Unsubscribe Rate"
     }[target_metric]
     output = f"""
 ## 📊 Performance Score: {performance_score}/100
@@ -174,8 +209,10 @@ def analyze_email_complete(subject, preview_text, body_text, day_of_week, send_t
 ### 🏷️ Content Classification
 """
     for i, (label, score) in enumerate(zip(classification_result['labels'][:6], classification_result['scores'][:6])):
         output += f"- **{label.title()}**: {score:.2f}\n"
     output += f"""
 ### 📋 Email Details
 - **Subject Length:** {subject_features['length']} characters
@@ -186,61 +223,86 @@ def analyze_email_complete(subject, preview_text, body_text, day_of_week, send_t
 ---
 #### 💬 Ask the Email Optimization Chatbot below for advice!
 """
-    # Save context for chatbot
-    gr.set_state({
-        "last_input": {
-            "subject": subject,
-            "preview_text": preview_text,
-            "body_text": body_text,
-            "day_of_week": day_of_week,
-            "send_time": send_time,
-            "target_metric": target_metric,
-            "scores": {
-                "performance_score": performance_score,
-                "subject_score": subject_score,
-                "preview_score": preview_score,
-                "body_score": body_score,
-                "predicted_value": predicted_value
-            },
-            "suggestions": {
-                "subject": subject_sugg,
-                "preview": preview_sugg,
-                "body": body_sugg
-            },
-            "sentiment": sentiment_result,
-            "classification": classification_result
-        }
-    })
-    return output
-def chatbot_response(user_message, state):
-    # Retrieve last analysis context
-    context = state.get("last_input", {})
-    if not context:
         return "Please analyze an email first, then ask your question here."
-    # Compose prompt for Flan-T5
-    prompt = f"""You are an expert email marketing assistant. Here is the analysis of an email campaign:
-Subject: {context['subject']}
-Preview: {context['preview_text']}
-Body: {context['body_text']}
-Day: {context['day_of_week']}
-Send Time: {context['send_time']}
-Target Metric: {context['target_metric']}
-Scores: {context['scores']}
-Suggestions: {context['suggestions']}
-Sentiment: {context['sentiment']}
-Classification: {context['classification']['labels'][:6]}
 User question: {user_message}
-Give a specific, actionable answer based on the above analysis."""
-    # Generate response
-    inputs = chatbot_tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
-    outputs = chatbot_model.generate(**inputs, max_new_tokens=256)
-    answer = chatbot_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return answer
 # Available options
 day_options = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
@@ -254,16 +316,24 @@ with gr.Blocks() as demo:
             analyze_btn = gr.Button("Analyze Email")
         with gr.Column():
             analysis_output = gr.Markdown()
     chatbot = gr.ChatInterface(
         fn=chatbot_response,
-        additional_inputs=[gr.State()],
         title="Email Optimization Chatbot",
         description="Ask for advice on how to improve your email based on the analysis above."
     )
     analyze_btn.click(
         analyze_email_complete,
         inputs=[subject, preview_text, body_text, day_of_week, send_time, target_metric],
-        outputs=analysis_output
     )
-demo.launch()

     day_encoder = joblib.load('day_encoder.pkl')
     feature_names = joblib.load('feature_names.pkl')
     model_results = joblib.load('model_results.pkl')
+    print("✅ Models loaded successfully!")
 except Exception as e:
+    print(f"❌ Error loading models: {e}")
 # Load sentiment analysis pipeline
 sentiment = pipeline("sentiment-analysis")
 # Expanded content classification labels
 classification_labels = [
     "engaging", "promotional", "informative", "urgent", "personal", "spammy",
 classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 # Load chatbot model (google/flan-t5-large)
+try:
+    chatbot_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
+    chatbot_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
+    print("✅ Chatbot model loaded successfully!")
+except Exception as e:
+    print(f"❌ Error loading chatbot model: {e}")
+    # Fallback to smaller model if large one fails
+    try:
+        chatbot_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
+        chatbot_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
+        print("✅ Fallback chatbot model loaded successfully!")
+    except Exception as e2:
+        print(f"❌ Error loading fallback model: {e2}")
 def extract_text_features(text):
     if pd.isna(text) or text == '':
     return ""
 def predict_email_performance(subject, preview_text, body_text, day_of_week, send_time, target_metric):
     try:
+        # Extract text features
+        subject_features = extract_text_features(subject)
+        preview_features = extract_text_features(preview_text)
+        body_features = extract_text_features(body_text)
+        # Parse send time
+        try:
+            send_hour = datetime.strptime(send_time, '%I:%M %p').hour
+        except:
+            send_hour = 9  # Default to 9 AM
+        # Encode categorical variables
+        try:
+            day_encoded = day_encoder.transform([day_of_week])[0]
+        except:
+            day_encoded = 0  # Default encoding
+        # Create feature vector (no list or audience size)
+        features = [
+            500000,  # Placeholder for audience size (kept for model compatibility)
+            send_hour,
+            day_encoded,
+            0  # Placeholder for list (kept for model compatibility)
+        ]
+        # Add text features in correct order
+        for feats in [subject_features, preview_features]:
+            for suffix in ['length', 'word_count', 'exclamation_count', 'question_count', 'emoji_count', 'number_count', 'caps_ratio']:
+                features.append(feats[suffix])
+        # Scale features (truncate or pad to match model input)
+        if len(features) > len(feature_names):
+            features = features[:len(feature_names)]
+        elif len(features) < len(feature_names):
+            features.extend([0] * (len(feature_names) - len(features)))
+        features_scaled = scaler.transform([features])
+        # Make prediction
+        model = models[target_metric]
+        prediction = model.predict(features_scaled)[0]
+        # Convert to percentage and ensure reasonable bounds
+        if target_metric == 'open_rate':
+            prediction = max(0, min(1, prediction)) * 100
+        elif target_metric == 'click_rate':
+            prediction = max(0, min(0.5, prediction)) * 100
+        else:  # unsubscribe_rate
+            prediction = max(0, min(0.1, prediction)) * 100
+        return prediction
+    except Exception as e:
+        print(f"Prediction error: {e}")
+        return 2.5  # Default prediction
 def analyze_email_complete(subject, preview_text, body_text, day_of_week, send_time, target_metric):
     # Section features and scores
     subject_features = extract_text_features(subject)
     preview_features = extract_text_features(preview_text)
     body_features = extract_text_features(body_text)
     subject_score = section_score(subject_features)
     preview_score = section_score(preview_features)
     body_score = section_score(body_features)
     # Section suggestions
     subject_sugg = section_suggestion("subject", subject_features)
     preview_sugg = section_suggestion("preview", preview_features)
     body_sugg = section_suggestion("body", body_features)
     # Overall performance score (weighted avg)
     performance_score = int(round(0.4 * subject_score + 0.3 * preview_score + 0.3 * body_score))
     # Predicted metric
     predicted_value = predict_email_performance(subject, preview_text, body_text, day_of_week, send_time, target_metric)
     # Sentiment analysis
     text_for_sentiment = f"{subject}\n{preview_text}\n{body_text}"
     sentiment_result = sentiment(text_for_sentiment)[0]
     # Zero-shot classification
     classification_result = classifier(text_for_sentiment, classification_labels)
     # Format output
     metric_label = {
         "open_rate": "Open Rate",
         "click_rate": "Click Rate",
         "unsubscribe_rate": "Unsubscribe Rate"
     }[target_metric]
     output = f"""
 ## 📊 Performance Score: {performance_score}/100
 ### 🏷️ Content Classification
 """
     for i, (label, score) in enumerate(zip(classification_result['labels'][:6], classification_result['scores'][:6])):
         output += f"- **{label.title()}**: {score:.2f}\n"
     output += f"""
 ### 📋 Email Details
 - **Subject Length:** {subject_features['length']} characters
 ---
 #### 💬 Ask the Email Optimization Chatbot below for advice!
 """
+    # Create context for chatbot
+    context = {
+        "subject": subject,
+        "preview_text": preview_text,
+        "body_text": body_text,
+        "day_of_week": day_of_week,
+        "send_time": send_time,
+        "target_metric": target_metric,
+        "scores": {
+            "performance_score": performance_score,
+            "subject_score": subject_score,
+            "preview_score": preview_score,
+            "body_score": body_score,
+            "predicted_value": predicted_value
+        },
+        "suggestions": {
+            "subject": subject_sugg,
+            "preview": preview_sugg,
+            "body": body_sugg
+        },
+        "sentiment": sentiment_result,
+        "classification": classification_result
+    }
+    return output, context
+def chatbot_response(user_message, context):
+    # Check if context exists
+    if not context or not isinstance(context, dict):
         return "Please analyze an email first, then ask your question here."
+    try:
+        # Compose prompt for Flan-T5
+        prompt = f"""You are an expert email marketing assistant. Here is the analysis of an email campaign:
+Subject: {context.get('subject', 'N/A')}
+Preview: {context.get('preview_text', 'N/A')}
+Body: {context.get('body_text', 'N/A')}
+Day: {context.get('day_of_week', 'N/A')}
+Send Time: {context.get('send_time', 'N/A')}
+Target Metric: {context.get('target_metric', 'N/A')}
+Performance Score: {context.get('scores', {}).get('performance_score', 'N/A')}/100
+Subject Score: {context.get('scores', {}).get('subject_score', 'N/A')}/100
+Preview Score: {context.get('scores', {}).get('preview_score', 'N/A')}/100
+Body Score: {context.get('scores', {}).get('body_score', 'N/A')}/100
+Predicted Value: {context.get('scores', {}).get('predicted_value', 'N/A')}%
+Current Suggestions:
+- Subject: {context.get('suggestions', {}).get('subject', 'N/A')}
+- Preview: {context.get('suggestions', {}).get('preview', 'N/A')}
+- Body: {context.get('suggestions', {}).get('body', 'N/A')}
+Sentiment: {context.get('sentiment', {}).get('label', 'N/A')}
+Top Classifications: {', '.join(context.get('classification', {}).get('labels', [])[:3])}
 User question: {user_message}
+Give a specific, actionable answer based on the above analysis. Be concise and practical."""
+        # Generate response
+        inputs = chatbot_tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
+        outputs = chatbot_model.generate(**inputs, max_new_tokens=256, do_sample=True, temperature=0.7)
+        answer = chatbot_tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Remove the original prompt from the answer if it's included
+        if prompt in answer:
+            answer = answer.replace(prompt, "").strip()
+        return answer if answer else "I'm sorry, I couldn't generate a response. Please try rephrasing your question."
+    except Exception as e:
+        print(f"Chatbot error: {e}")
+        return "I'm having trouble generating a response right now. Please try again."
 # Available options
 day_options = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
+# Create Gradio interface
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
             analyze_btn = gr.Button("Analyze Email")
         with gr.Column():
             analysis_output = gr.Markdown()
+    # State to store context
+    state = gr.State()
+    # Chatbot interface
     chatbot = gr.ChatInterface(
         fn=chatbot_response,
+        additional_inputs=[state],
         title="Email Optimization Chatbot",
         description="Ask for advice on how to improve your email based on the analysis above."
     )
+    # Connect the analyze button
     analyze_btn.click(
         analyze_email_complete,
         inputs=[subject, preview_text, body_text, day_of_week, send_time, target_metric],
+        outputs=[analysis_output, state]
     )
+if __name__ == "__main__":
+    demo.launch()