import torch import torch.nn as nn from torchvision import transforms from PIL import Image import gradio as gr from transformers import pipeline # Load emotion classes classes = ['Angry', 'Disgust', 'Fear', 'Sad', 'Surprise', 'Neutral', 'Happy'] # Set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Define the actual architecture used for training class EmotionModel(nn.Module): def __init__(self): super(EmotionModel, self).__init__() self.model = nn.Sequential( nn.Conv2d(1, 32, 3, padding=1), # Match 32 out_channels nn.ReLU(), nn.MaxPool2d(2, 2), nn.Conv2d(32, 64, 3, padding=1), # Match 64 out_channels nn.ReLU(), nn.MaxPool2d(2, 2), nn.Flatten(), nn.Linear(64 * 12 * 12, 128), # 64 filters * 12x12 features = 9216 nn.ReLU(), nn.Linear(128, 7) ) def forward(self, x): return self.model(x) # Load the model model = EmotionModel().to(device) model.load_state_dict(torch.load("emotion_model.pth", map_location=device)) model.eval() # Transformation transform = transforms.Compose([ transforms.Grayscale(), transforms.Resize((48, 48)), transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)) ]) # NLP pipeline gen = pipeline("text-generation", model="distilgpt2") # Templates with emojis included now templates = { "Friendly": { "Happy": "You seem cheerful 😊! ", "Sad": "You look down 😒. Here's something to cheer you up... ", "Angry": "Feeling heated 😠? Let's shift the vibe... ", "Fear": "A little nervous 😨? No worries, try this... ", "Disgust": "Something bugging you 🀒?", "Surprise": "Whoa! 😲 That was unexpected?", "Neutral": "Keeping it chill 😐?" }, "Professional": { "Happy": "You look ready to go 😊.", "Sad": "You seem thoughtful 😒.", "Angry": "Tense vibes 😠.", "Fear": "Unsure? 😨", "Disgust": "Hey! Reset your focus 🀒", "Surprise": "Unexpected moment 😲?", "Neutral": "Yo! Are you redy for this 😐?" }, "Funny": { "Happy": "You’re all smiles 😊!", "Sad": "You need a laugh 😒.", "Angry": "Cool it down 😠", "Fear": "Let’s laugh the fear away 😨: ", "Disgust": "Weird day 🀒?", "Surprise": "Surprise! 😲", "Neutral": "Let’s wake things up 😐 with a joke. " } } # Prediction logic # Emotion to Emoji mapping emotion_emojis = { "Angry": "😠", "Disgust": "🀒", "Fear": "😨", "Happy": "πŸ˜„", "Sad": "😒", "Surprise": "😲", "Neutral": "😐" } def predict_emotion_and_icebreaker(image, tone): image = Image.fromarray(image).convert("RGB") image = transform(image).unsqueeze(0).to(device) with torch.no_grad(): output = model(image) pred = output.argmax(dim=1).item() emotion = classes[pred] # Emojis per emotion emotion_emojis = { "Happy": "😊", "Sad": "😒", "Angry": "😠", "Fear": "😨", "Disgust": "🀒", "Surprise": "😲", "Neutral": "😐" } emoji = emotion_emojis[emotion] tone_prefix = templates[tone][emotion] # Prompt for generation prompt = f"{tone_prefix} {emoji} Try saying:" response = gen(prompt, max_length=40, num_return_sequences=1, do_sample=True, temperature=0.7, top_k=50)[0]['generated_text'] clean_response = response.replace(prompt, "").strip().split("\n")[0] return f"🧠 Emotion Detected: {emotion} {emoji}\nπŸ’¬ Icebreaker ({tone}):\n{clean_response}" # Generate icebreaker with better sampling generated = gen( prompt, max_length=60, num_return_sequences=1, do_sample=True, temperature=0.9, top_p=0.95 )[0]['generated_text'] # Remove the prompt part from the response response = generated[len(prompt):].strip() # Fallback if no content was generated if not response: response = "Here's a fun question to get started: What's your hidden talent?" return f"🧠 Emotion Detected: {emotion}\nπŸ’¬ Icebreaker ({tone}):\n{response}" # Interface webcam_input = gr.Image(type="numpy", label="Upload or Take a Photo") tone_dropdown = gr.Dropdown(choices=["Friendly", "Professional", "Funny"], value="Friendly", label="Tone") demo = gr.Interface( fn=predict_emotion_and_icebreaker, inputs=[webcam_input, tone_dropdown], outputs="text", title="Emotion + Icebreaker Generator", description="Upload or capture a face photo. AI will predict the emotion and generate a tone-specific conversation starter." ) demo.launch()