# ==========================================
# EMOTION DETECTION WEB APP
# Model: koyelog/face
# Backend + Frontend with Gradio
# ==========================================
import gradio as gr
import torch
from transformers import ViTForImageClassification, ViTImageProcessor
from PIL import Image
import numpy as np
import os
print("="*70)
print("š AI EMOTION DETECTOR - INITIALIZING")
print("="*70)
# ===== CONFIGURATION =====
MODEL_ID = "koyelog/face"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"\nš¦ Model ID: {MODEL_ID}")
print(f"š„ļø Device: {DEVICE}")
print(f"š¾ PyTorch Version: {torch.__version__}")
# ===== LOAD MODEL & PROCESSOR =====
print("\nā³ Loading model from HuggingFace...")
try:
model = ViTForImageClassification.from_pretrained(
MODEL_ID,
cache_dir="./model_cache"
)
processor = ViTImageProcessor.from_pretrained(
MODEL_ID,
cache_dir="./model_cache"
)
model.to(DEVICE)
model.eval()
print("ā
Model loaded successfully!")
print(f"š Model Parameters: {sum(p.numel() for p in model.parameters()):,}")
except Exception as e:
print(f"ā ERROR loading model: {e}")
raise
# ===== EMOTION CONFIGURATION =====
EMOTIONS = {
0: {'name': 'Angry', 'emoji': 'š ', 'color': '#ff4444', 'description': 'Showing anger or frustration'},
1: {'name': 'Disgust', 'emoji': 'š¤¢', 'color': '#44ff44', 'description': 'Expressing disgust or dislike'},
2: {'name': 'Fear', 'emoji': 'šØ', 'color': '#9944ff', 'description': 'Showing fear or anxiety'},
3: {'name': 'Happy', 'emoji': 'š', 'color': '#ffdd44', 'description': 'Expressing happiness or joy'},
4: {'name': 'Sad', 'emoji': 'š¢', 'color': '#4444ff', 'description': 'Showing sadness or sorrow'},
5: {'name': 'Surprise', 'emoji': 'š²', 'color': '#ff44ff', 'description': 'Expressing surprise or shock'},
6: {'name': 'Neutral', 'emoji': 'š', 'color': '#888888', 'description': 'No strong emotion detected'}
}
print(f"\nš Loaded {len(EMOTIONS)} emotion classes:")
for idx, emo in EMOTIONS.items():
print(f" {idx}: {emo['emoji']} {emo['name']}")
# ===== PREDICTION FUNCTION =====
@torch.no_grad()
def predict_emotion(image):
"""
Predict emotion from image
Args:
image: PIL Image or numpy array
Returns:
results_dict: Dictionary for Gradio Label
html_output: Formatted HTML result
"""
if image is None:
return None, """
ā ļø No Image Provided
Please upload an image or use webcam to capture!
"""
try:
# Convert numpy to PIL if needed
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
# Convert to RGB
if image.mode != 'RGB':
image = image.convert('RGB')
original_size = image.size
print(f"\nšø Processing image: {original_size[0]}x{original_size[1]}")
# Preprocess
inputs = processor(images=image, return_tensors="pt")
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
# Inference
outputs = model(**inputs)
logits = outputs.logits
probs = torch.nn.functional.softmax(logits, dim=-1)[0].cpu()
# Get predictions
predicted_id = torch.argmax(probs).item()
confidence = probs[predicted_id].item()
# Get emotion details
emotion = EMOTIONS[predicted_id]
print(f"šÆ Prediction: {emotion['emoji']} {emotion['name']}")
print(f"š Confidence: {confidence*100:.2f}%")
print(f"š Top 3 emotions:")
top3_indices = torch.topk(probs, 3).indices
for idx in top3_indices:
print(f" {EMOTIONS[idx.item()]['emoji']} {EMOTIONS[idx.item()]['name']}: {probs[idx]*100:.2f}%")
# Format results for Gradio Label component
results = {
f"{EMOTIONS[i]['emoji']} {EMOTIONS[i]['name']}": float(probs[i])
for i in range(len(EMOTIONS))
}
# Generate HTML output
html = generate_result_html(
emotion['name'],
emotion['emoji'],
emotion['color'],
emotion['description'],
confidence,
probs
)
return results, html
except Exception as e:
print(f"ā ERROR during prediction: {e}")
import traceback
traceback.print_exc()
error_html = f"""
ā Prediction Error
{str(e)}
Please try a different image
"""
return None, error_html
# ===== HTML GENERATOR =====
def generate_result_html(name, emoji, color, description, confidence, probs):
"""Generate beautiful HTML result display"""
# Calculate probability bars HTML
bars_html = ""
for idx in sorted(range(len(EMOTIONS)), key=lambda i: probs[i], reverse=True):
emo = EMOTIONS[idx]
prob = probs[idx].item()
percentage = prob * 100
bar_width = min(percentage, 100)
bars_html += f"""
{emo['emoji']}
{emo['name']}
{percentage:.1f}%
"""
# Main HTML
html = f"""
{emoji}
{name}
{description}
Confidence:
{confidence*100:.1f}%
š Detailed Emotion Analysis
{bars_html}
Model: koyelog/face (Vision Transformer) |
Accuracy: 98.80% |
Parameters: 85.8M
"""
return html
# ===== GRADIO INTERFACE =====
print("\nšØ Building Gradio interface...")
# Custom CSS
custom_css = """
.gradio-container {
font-family: 'Segoe UI', -apple-system, BlinkMacSystemFont, sans-serif !important;
max-width: 1400px !important;
}
.main-header {
text-align: center;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 60px 30px;
border-radius: 25px;
margin-bottom: 40px;
box-shadow: 0 15px 50px rgba(102, 126, 234, 0.3);
}
.tab-nav button {
font-size: 18px !important;
font-weight: 600 !important;
padding: 18px 30px !important;
}
.gr-button-primary {
background: linear-gradient(135deg, #667eea, #764ba2) !important;
border: none !important;
font-size: 18px !important;
font-weight: 600 !important;
padding: 16px 40px !important;
border-radius: 12px !important;
transition: all 0.3s ease !important;
}
.gr-button-primary:hover {
transform: translateY(-2px) !important;
box-shadow: 0 8px 25px rgba(102, 126, 234, 0.4) !important;
}
footer {
visibility: hidden !important;
}
"""
# Create Gradio Interface
with gr.Blocks(
theme=gr.themes.Soft(
primary_hue="purple",
secondary_hue="pink",
font=gr.themes.GoogleFont("Inter")
),
css=custom_css,
title="š AI Emotion Detector | koyelog",
analytics_enabled=False
) as demo:
# Header
gr.HTML("""
š AI Emotion Detector
Powered by Vision Transformer | 98.80% Validation Accuracy
Model: koyelog/face | 85.8M Parameters | Real-time Detection
š Angry
𤢠Disgust
šØ Fear
š Happy
š¢ Sad
š² Surprise
š Neutral
""")
with gr.Tabs():
# TAB 1: WEBCAM
with gr.Tab("š¹ Live Webcam Detection"):
gr.Markdown("""
### š„ Capture Your Emotion in Real-Time
Click the camera button to capture your face and instantly detect your emotion!
""")
with gr.Row():
with gr.Column(scale=1):
webcam_input = gr.Image(
sources=["webcam"],
type="pil",
label="šø Your Face",
streaming=False,
mirror_webcam=True
)
webcam_button = gr.Button(
"š Detect My Emotion",
variant="primary",
size="lg",
scale=1
)
with gr.Column(scale=1):
webcam_html = gr.HTML(label="šÆ Emotion Result")
webcam_label = gr.Label(
label="š Emotion Probabilities",
num_top_classes=7
)
webcam_button.click(
fn=predict_emotion,
inputs=webcam_input,
outputs=[webcam_label, webcam_html]
)
# TAB 2: UPLOAD
with gr.Tab("š¼ļø Upload Image"):
gr.Markdown("""
### š¤ Upload or Drag & Drop Face Image
Supports JPG, PNG, JPEG formats. Best results with front-facing, well-lit photos!
""")
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(
type="pil",
label="š¼ļø Upload Face Image",
sources=["upload", "clipboard"]
)
image_button = gr.Button(
"š Detect Emotion",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
image_html = gr.HTML(label="šÆ Emotion Result")
image_label = gr.Label(
label="š Emotion Probabilities",
num_top_classes=7
)
image_button.click(
fn=predict_emotion,
inputs=image_input,
outputs=[image_label, image_html]
)
# Footer
gr.HTML("""
š Model Information
Architecture
Vision Transformer (ViT)
Accuracy
Train: 99.29% | Val: 98.80%
Training Details
Dataset: 181,230 images across 7 emotion categories
Training Epochs: 20 epochs with dual T4 GPUs
Best Epoch: Epoch 20/20 (Val Acc: 98.80%)
License: MIT License
ā ļø Best Results: Front-facing photos | Good lighting | Single face | Clear expressions
Created by Koyeliya Ghosh
View Model on HuggingFace ā
""")
# ===== LAUNCH =====
if __name__ == "__main__":
print("\n" + "="*70)
print("š LAUNCHING EMOTION DETECTION APP")
print("="*70)
print("ā
Model loaded and ready")
print("ā
Gradio interface built")
print("ā
Starting server...\n")
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True,
show_api=True
)