import gradio as gr import joblib import pandas as pd import numpy as np import re from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM from datetime import datetime # Load models and preprocessors (if available) try: models = joblib.load('email_quality_models.pkl') scaler = joblib.load('feature_scaler.pkl') day_encoder = joblib.load('day_encoder.pkl') feature_names = joblib.load('feature_names.pkl') model_results = joblib.load('model_results.pkl') print("✅ Models loaded successfully!") except Exception as e: print(f"❌ Error loading models: {e}") # Load sentiment analysis pipeline sentiment = pipeline("sentiment-analysis") # Expanded content classification labels classification_labels = [ "engaging", "promotional", "informative", "urgent", "personal", "spammy", "announcement", "educational", "sales", "boring", "friendly", "exclusive" ] classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") # Load chatbot model (google/flan-t5-large) try: chatbot_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large") chatbot_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large") print("✅ Chatbot model loaded successfully!") except Exception as e: print(f"❌ Error loading chatbot model: {e}") # Fallback to smaller model if large one fails try: chatbot_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base") chatbot_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base") print("✅ Fallback chatbot model loaded successfully!") except Exception as e2: print(f"❌ Error loading fallback model: {e2}") def extract_text_features(text): if pd.isna(text) or text == '': return { 'length': 0, 'word_count': 0, 'exclamation_count': 0, 'question_count': 0, 'emoji_count': 0, 'number_count': 0, 'caps_ratio': 0 } return { 'length': len(text), 'word_count': len(text.split()), 'exclamation_count': text.count('!'), 'question_count': text.count('?'), 'emoji_count': len(re.findall(r'[^\w\s,.]', text)), 'number_count': len(re.findall(r'\d+', text)), 'caps_ratio': sum(1 for c in text if c.isupper()) / len(text) if len(text) > 0 else 0 } def section_score(features): # Placeholder: score out of 100 based on length, punctuation, and emoji score = 50 score += min(20, features['emoji_count'] * 10) score += min(10, features['exclamation_count'] * 5) score += min(10, features['question_count'] * 5) if 20 <= features['length'] <= 60: score += 10 score = max(0, min(100, score)) return score def section_suggestion(section, features): # Simple, section-specific suggestions if section == "subject": if features['length'] > 50: return "Try shortening your subject line for better impact." if features['emoji_count'] == 0: return "Add an emoji to make your subject line stand out." if features['exclamation_count'] == 0: return "Consider adding an exclamation mark for urgency." return "Your subject line looks good!" elif section == "preview": if features['length'] < 20: return "Add more detail to your preview text." if features['emoji_count'] == 0: return "Try adding an emoji to your preview text." return "Your preview text is engaging!" elif section == "body": if features['word_count'] < 50: return "Consider a longer, more detailed email body." if features['exclamation_count'] == 0: return "Try using an exclamation mark to highlight key points." return "Your email body is well-structured!" return "" def predict_email_performance(subject, preview_text, body_text, day_of_week, send_time, target_metric): try: # Extract text features subject_features = extract_text_features(subject) preview_features = extract_text_features(preview_text) body_features = extract_text_features(body_text) # Parse send time try: send_hour = datetime.strptime(send_time, '%I:%M %p').hour except: send_hour = 9 # Default to 9 AM # Encode categorical variables try: day_encoded = day_encoder.transform([day_of_week])[0] except: day_encoded = 0 # Default encoding # Create feature vector (no list or audience size) features = [ 500000, # Placeholder for audience size (kept for model compatibility) send_hour, day_encoded, 0 # Placeholder for list (kept for model compatibility) ] # Add text features in correct order for feats in [subject_features, preview_features]: for suffix in ['length', 'word_count', 'exclamation_count', 'question_count', 'emoji_count', 'number_count', 'caps_ratio']: features.append(feats[suffix]) # Scale features (truncate or pad to match model input) if len(features) > len(feature_names): features = features[:len(feature_names)] elif len(features) < len(feature_names): features.extend([0] * (len(feature_names) - len(features))) features_scaled = scaler.transform([features]) # Make prediction model = models[target_metric] prediction = model.predict(features_scaled)[0] # Convert to percentage and ensure reasonable bounds if target_metric == 'open_rate': prediction = max(0, min(1, prediction)) * 100 elif target_metric == 'click_rate': prediction = max(0, min(0.5, prediction)) * 100 else: # unsubscribe_rate prediction = max(0, min(0.1, prediction)) * 100 return prediction except Exception as e: print(f"Prediction error: {e}") return 2.5 # Default prediction def analyze_email_complete(subject, preview_text, body_text, day_of_week, send_time, target_metric): # Section features and scores subject_features = extract_text_features(subject) preview_features = extract_text_features(preview_text) body_features = extract_text_features(body_text) subject_score = section_score(subject_features) preview_score = section_score(preview_features) body_score = section_score(body_features) # Section suggestions subject_sugg = section_suggestion("subject", subject_features) preview_sugg = section_suggestion("preview", preview_features) body_sugg = section_suggestion("body", body_features) # Overall performance score (weighted avg) performance_score = int(round(0.4 * subject_score + 0.3 * preview_score + 0.3 * body_score)) # Predicted metric predicted_value = predict_email_performance(subject, preview_text, body_text, day_of_week, send_time, target_metric) # Sentiment analysis text_for_sentiment = f"{subject}\n{preview_text}\n{body_text}" sentiment_result = sentiment(text_for_sentiment)[0] # Zero-shot classification classification_result = classifier(text_for_sentiment, classification_labels) # Format output metric_label = { "open_rate": "Open Rate", "click_rate": "Click Rate", "unsubscribe_rate": "Unsubscribe Rate" }[target_metric] output = f""" ## 📊 Performance Score: {performance_score}/100 ### 🎯 Predicted {metric_label}: {predicted_value:.2f}% ### ✂️ Section Scores & Suggestions - **Subject Line:** {subject_score}/100 _Suggestion: {subject_sugg}_ - **Preview Text:** {preview_score}/100 _Suggestion: {preview_sugg}_ - **Body Text:** {body_score}/100 _Suggestion: {body_sugg}_ ### 📈 Sentiment Analysis - **Sentiment:** {sentiment_result['label']} (confidence: {sentiment_result['score']:.2f}) ### 🏷️ Content Classification """ for i, (label, score) in enumerate(zip(classification_result['labels'][:6], classification_result['scores'][:6])): output += f"- **{label.title()}**: {score:.2f}\n" output += f""" ### 📋 Email Details - **Subject Length:** {subject_features['length']} characters - **Preview Length:** {preview_features['length']} characters - **Body Word Count:** {body_features['word_count']} words - **Send Time:** {send_time} on {day_of_week} --- #### 💬 Ask the Email Optimization Chatbot below for advice! """ # Create context for chatbot context = { "subject": subject, "preview_text": preview_text, "body_text": body_text, "day_of_week": day_of_week, "send_time": send_time, "target_metric": target_metric, "scores": { "performance_score": performance_score, "subject_score": subject_score, "preview_score": preview_score, "body_score": body_score, "predicted_value": predicted_value }, "suggestions": { "subject": subject_sugg, "preview": preview_sugg, "body": body_sugg }, "sentiment": sentiment_result, "classification": classification_result } return output, context def chatbot_response(user_message, history, context): # Check if context exists if not context or not isinstance(context, dict): return "Please analyze an email first, then ask your question here." try: # Compose prompt for Flan-T5 prompt = f"""You are an expert email marketing assistant. Here is the analysis of an email campaign: Subject: {context.get('subject', 'N/A')} Preview: {context.get('preview_text', 'N/A')} Body: {context.get('body_text', 'N/A')} Day: {context.get('day_of_week', 'N/A')} Send Time: {context.get('send_time', 'N/A')} Target Metric: {context.get('target_metric', 'N/A')} Performance Score: {context.get('scores', {}).get('performance_score', 'N/A')}/100 Subject Score: {context.get('scores', {}).get('subject_score', 'N/A')}/100 Preview Score: {context.get('scores', {}).get('preview_score', 'N/A')}/100 Body Score: {context.get('scores', {}).get('body_score', 'N/A')}/100 Predicted Value: {context.get('scores', {}).get('predicted_value', 'N/A')}% Current Suggestions: - Subject: {context.get('suggestions', {}).get('subject', 'N/A')} - Preview: {context.get('suggestions', {}).get('preview', 'N/A')} - Body: {context.get('suggestions', {}).get('body', 'N/A')} Sentiment: {context.get('sentiment', {}).get('label', 'N/A')} Top Classifications: {', '.join(context.get('classification', {}).get('labels', [])[:3])} User question: {user_message} Give a specific, actionable answer based on the above analysis. Be concise and practical.""" # Generate response inputs = chatbot_tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True) outputs = chatbot_model.generate(**inputs, max_new_tokens=256, do_sample=True, temperature=0.7) answer = chatbot_tokenizer.decode(outputs[0], skip_special_tokens=True) # Remove the original prompt from the answer if it's included if prompt in answer: answer = answer.replace(prompt, "").strip() return answer if answer else "I'm sorry, I couldn't generate a response. Please try rephrasing your question." except Exception as e: print(f"Chatbot error: {e}") return "I'm having trouble generating a response right now. Please try again." # Available options day_options = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] # Create Gradio interface with gr.Blocks() as demo: gr.Markdown( """ # 🚀 Email Performance Predictor – Forks Over Knives Predict your email’s open, click, and unsubscribe rates. Get actionable, section-specific suggestions, content classification, and optimization advice from the chatbot below! """ ) with gr.Row(): with gr.Column(): subject = gr.Textbox(label="📧 Subject Line", placeholder="Enter your email subject line") preview_text = gr.Textbox(label="👀 Preview Text", placeholder="Enter preview text (optional)") body_text = gr.Textbox(label="📝 Email Body", placeholder="Paste your email body here") day_of_week = gr.Dropdown(choices=day_options, label="📅 Day of Week", value="Thursday") send_time = gr.Textbox(label="⏰ Send Time", placeholder="9:00 AM", value="9:00 AM") target_metric = gr.Radio(choices=['open_rate', 'click_rate', 'unsubscribe_rate'], label="🎯 Target Metric", value='click_rate') analyze_btn = gr.Button("Analyze Email") with gr.Column(): analysis_output = gr.Markdown() # State to store context state = gr.State() # Chatbot interface chatbot = gr.ChatInterface( fn=chatbot_response, additional_inputs=[state], title="Email Optimization Chatbot", description="Ask for advice on how to improve your email based on the analysis above." ) # Connect the analyze button analyze_btn.click( analyze_email_complete, inputs=[subject, preview_text, body_text, day_of_week, send_time, target_metric], outputs=[analysis_output, state] ) if __name__ == "__main__": demo.launch()