Spaces:

Afathman
/

email-performance-predictor

Sleeping

File size: 13,534 Bytes

import gradio as gr
import joblib
import pandas as pd
import numpy as np
import re
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from datetime import datetime

# Load models and preprocessors (if available)
try:
    models = joblib.load('email_quality_models.pkl')
    scaler = joblib.load('feature_scaler.pkl')
    day_encoder = joblib.load('day_encoder.pkl')
    feature_names = joblib.load('feature_names.pkl')
    model_results = joblib.load('model_results.pkl')
    print("✅ Models loaded successfully!")
except Exception as e:
    print(f"❌ Error loading models: {e}")

# Load sentiment analysis pipeline
sentiment = pipeline("sentiment-analysis")

# Expanded content classification labels
classification_labels = [
    "engaging", "promotional", "informative", "urgent", "personal", "spammy",
    "announcement", "educational", "sales", "boring", "friendly", "exclusive"
]
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Load chatbot model (google/flan-t5-large)
try:
    chatbot_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
    chatbot_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
    print("✅ Chatbot model loaded successfully!")
except Exception as e:
    print(f"❌ Error loading chatbot model: {e}")
    # Fallback to smaller model if large one fails
    try:
        chatbot_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
        chatbot_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
        print("✅ Fallback chatbot model loaded successfully!")
    except Exception as e2:
        print(f"❌ Error loading fallback model: {e2}")

def extract_text_features(text):
    if pd.isna(text) or text == '':
        return {
            'length': 0,
            'word_count': 0,
            'exclamation_count': 0,
            'question_count': 0,
            'emoji_count': 0,
            'number_count': 0,
            'caps_ratio': 0
        }
    return {
        'length': len(text),
        'word_count': len(text.split()),
        'exclamation_count': text.count('!'),
        'question_count': text.count('?'),
        'emoji_count': len(re.findall(r'[^\w\s,.]', text)),
        'number_count': len(re.findall(r'\d+', text)),
        'caps_ratio': sum(1 for c in text if c.isupper()) / len(text) if len(text) > 0 else 0
    }

def section_score(features):
    # Placeholder: score out of 100 based on length, punctuation, and emoji
    score = 50
    score += min(20, features['emoji_count'] * 10)
    score += min(10, features['exclamation_count'] * 5)
    score += min(10, features['question_count'] * 5)
    if 20 <= features['length'] <= 60:
        score += 10
    score = max(0, min(100, score))
    return score

def section_suggestion(section, features):
    # Simple, section-specific suggestions
    if section == "subject":
        if features['length'] > 50:
            return "Try shortening your subject line for better impact."
        if features['emoji_count'] == 0:
            return "Add an emoji to make your subject line stand out."
        if features['exclamation_count'] == 0:
            return "Consider adding an exclamation mark for urgency."
        return "Your subject line looks good!"
    elif section == "preview":
        if features['length'] < 20:
            return "Add more detail to your preview text."
        if features['emoji_count'] == 0:
            return "Try adding an emoji to your preview text."
        return "Your preview text is engaging!"
    elif section == "body":
        if features['word_count'] < 50:
            return "Consider a longer, more detailed email body."
        if features['exclamation_count'] == 0:
            return "Try using an exclamation mark to highlight key points."
        return "Your email body is well-structured!"
    return ""

def predict_email_performance(subject, preview_text, body_text, day_of_week, send_time, target_metric):
    try:
        # Extract text features
        subject_features = extract_text_features(subject)
        preview_features = extract_text_features(preview_text)
        body_features = extract_text_features(body_text)

        # Parse send time
        try:
            send_hour = datetime.strptime(send_time, '%I:%M %p').hour
        except:
            send_hour = 9  # Default to 9 AM

        # Encode categorical variables
        try:
            day_encoded = day_encoder.transform([day_of_week])[0]
        except:
            day_encoded = 0  # Default encoding

        # Create feature vector (no list or audience size)
        features = [
            500000,  # Placeholder for audience size (kept for model compatibility)
            send_hour,
            day_encoded,
            0  # Placeholder for list (kept for model compatibility)
        ]

        # Add text features in correct order
        for feats in [subject_features, preview_features]:
            for suffix in ['length', 'word_count', 'exclamation_count', 'question_count', 'emoji_count', 'number_count', 'caps_ratio']:
                features.append(feats[suffix])

        # Scale features (truncate or pad to match model input)
        if len(features) > len(feature_names):
            features = features[:len(feature_names)]
        elif len(features) < len(feature_names):
            features.extend([0] * (len(feature_names) - len(features)))

        features_scaled = scaler.transform([features])

        # Make prediction
        model = models[target_metric]
        prediction = model.predict(features_scaled)[0]

        # Convert to percentage and ensure reasonable bounds
        if target_metric == 'open_rate':
            prediction = max(0, min(1, prediction)) * 100
        elif target_metric == 'click_rate':
            prediction = max(0, min(0.5, prediction)) * 100
        else:  # unsubscribe_rate
            prediction = max(0, min(0.1, prediction)) * 100

        return prediction

    except Exception as e:
        print(f"Prediction error: {e}")
        return 2.5  # Default prediction

def analyze_email_complete(subject, preview_text, body_text, day_of_week, send_time, target_metric):
    # Section features and scores
    subject_features = extract_text_features(subject)
    preview_features = extract_text_features(preview_text)
    body_features = extract_text_features(body_text)

    subject_score = section_score(subject_features)
    preview_score = section_score(preview_features)
    body_score = section_score(body_features)

    # Section suggestions
    subject_sugg = section_suggestion("subject", subject_features)
    preview_sugg = section_suggestion("preview", preview_features)
    body_sugg = section_suggestion("body", body_features)

    # Overall performance score (weighted avg)
    performance_score = int(round(0.4 * subject_score + 0.3 * preview_score + 0.3 * body_score))

    # Predicted metric
    predicted_value = predict_email_performance(subject, preview_text, body_text, day_of_week, send_time, target_metric)

    # Sentiment analysis
    text_for_sentiment = f"{subject}\n{preview_text}\n{body_text}"
    sentiment_result = sentiment(text_for_sentiment)[0]

    # Zero-shot classification
    classification_result = classifier(text_for_sentiment, classification_labels)

    # Format output
    metric_label = {
        "open_rate": "Open Rate",
        "click_rate": "Click Rate",
        "unsubscribe_rate": "Unsubscribe Rate"
    }[target_metric]

    output = f"""
## 📊 Performance Score: {performance_score}/100

### 🎯 Predicted {metric_label}: {predicted_value:.2f}%

### ✂️ Section Scores & Suggestions
- **Subject Line:** {subject_score}/100  
  _Suggestion: {subject_sugg}_
- **Preview Text:** {preview_score}/100  
  _Suggestion: {preview_sugg}_
- **Body Text:** {body_score}/100  
  _Suggestion: {body_sugg}_

### 📈 Sentiment Analysis
- **Sentiment:** {sentiment_result['label']} (confidence: {sentiment_result['score']:.2f})

### 🏷️ Content Classification
"""

    for i, (label, score) in enumerate(zip(classification_result['labels'][:6], classification_result['scores'][:6])):
        output += f"- **{label.title()}**: {score:.2f}\n"

    output += f"""
### 📋 Email Details
- **Subject Length:** {subject_features['length']} characters
- **Preview Length:** {preview_features['length']} characters
- **Body Word Count:** {body_features['word_count']} words
- **Send Time:** {send_time} on {day_of_week}

---
#### 💬 Ask the Email Optimization Chatbot below for advice!
"""

    # Create context for chatbot
    context = {
        "subject": subject,
        "preview_text": preview_text,
        "body_text": body_text,
        "day_of_week": day_of_week,
        "send_time": send_time,
        "target_metric": target_metric,
        "scores": {
            "performance_score": performance_score,
            "subject_score": subject_score,
            "preview_score": preview_score,
            "body_score": body_score,
            "predicted_value": predicted_value
        },
        "suggestions": {
            "subject": subject_sugg,
            "preview": preview_sugg,
            "body": body_sugg
        },
        "sentiment": sentiment_result,
        "classification": classification_result
    }

    return output, context

def chatbot_response(user_message, history, context):
    # Check if context exists
    if not context or not isinstance(context, dict):
        return "Please analyze an email first, then ask your question here."

    try:
        # Compose prompt for Flan-T5
        prompt = f"""You are an expert email marketing assistant. Here is the analysis of an email campaign:

Subject: {context.get('subject', 'N/A')}
Preview: {context.get('preview_text', 'N/A')}
Body: {context.get('body_text', 'N/A')}
Day: {context.get('day_of_week', 'N/A')}
Send Time: {context.get('send_time', 'N/A')}
Target Metric: {context.get('target_metric', 'N/A')}

Performance Score: {context.get('scores', {}).get('performance_score', 'N/A')}/100
Subject Score: {context.get('scores', {}).get('subject_score', 'N/A')}/100
Preview Score: {context.get('scores', {}).get('preview_score', 'N/A')}/100
Body Score: {context.get('scores', {}).get('body_score', 'N/A')}/100
Predicted Value: {context.get('scores', {}).get('predicted_value', 'N/A')}%

Current Suggestions:
- Subject: {context.get('suggestions', {}).get('subject', 'N/A')}
- Preview: {context.get('suggestions', {}).get('preview', 'N/A')}
- Body: {context.get('suggestions', {}).get('body', 'N/A')}

Sentiment: {context.get('sentiment', {}).get('label', 'N/A')}
Top Classifications: {', '.join(context.get('classification', {}).get('labels', [])[:3])}

User question: {user_message}

Give a specific, actionable answer based on the above analysis. Be concise and practical."""

        # Generate response
        inputs = chatbot_tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
        outputs = chatbot_model.generate(**inputs, max_new_tokens=256, do_sample=True, temperature=0.7)
        answer = chatbot_tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Remove the original prompt from the answer if it's included
        if prompt in answer:
            answer = answer.replace(prompt, "").strip()

        return answer if answer else "I'm sorry, I couldn't generate a response. Please try rephrasing your question."

    except Exception as e:
        print(f"Chatbot error: {e}")
        return "I'm having trouble generating a response right now. Please try again."

# Available options
day_options = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown(
        """
        # 🚀 Email Performance Predictor – Forks Over Knives

        Predict your email’s open, click, and unsubscribe rates.
        Get actionable, section-specific suggestions, content classification, and optimization advice from the chatbot below!
        """
        )
    with gr.Row():
        with gr.Column():
            subject = gr.Textbox(label="📧 Subject Line", placeholder="Enter your email subject line")
            preview_text = gr.Textbox(label="👀 Preview Text", placeholder="Enter preview text (optional)")
            body_text = gr.Textbox(label="📝 Email Body", placeholder="Paste your email body here")
            day_of_week = gr.Dropdown(choices=day_options, label="📅 Day of Week", value="Thursday")
            send_time = gr.Textbox(label="⏰ Send Time", placeholder="9:00 AM", value="9:00 AM")
            target_metric = gr.Radio(choices=['open_rate', 'click_rate', 'unsubscribe_rate'],
                                     label="🎯 Target Metric", value='click_rate')
            analyze_btn = gr.Button("Analyze Email")
        with gr.Column():
            analysis_output = gr.Markdown()

    # State to store context
    state = gr.State()

    # Chatbot interface
    chatbot = gr.ChatInterface(
        fn=chatbot_response,
        additional_inputs=[state],
        title="Email Optimization Chatbot",
        description="Ask for advice on how to improve your email based on the analysis above."
    )

    # Connect the analyze button
    analyze_btn.click(
        analyze_email_complete,
        inputs=[subject, preview_text, body_text, day_of_week, send_time, target_metric],
        outputs=[analysis_output, state]
    )

if __name__ == "__main__":
    demo.launch()