import gradio as gr
import torch
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForImageClassification

# Load processor and model from Hugging Face Hub
processor = AutoImageProcessor.from_pretrained("Dc-4nderson/vit-emotion-classifier")
model = AutoModelForImageClassification.from_pretrained("Dc-4nderson/vit-emotion-classifier")

# Inference function
def classify_emotion(image: Image.Image) -> str:
    # Ensure image is in RGB mode
    image = image.convert("RGB")

    # Preprocess and predict
    inputs = processor(images=image, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
        pred = torch.argmax(outputs.logits, dim=1).item()

    label = model.config.id2label[pred]
    return f"🧠 Predicted Emotion: **{label}**"

# Gradio interface
demo = gr.Interface(
    fn=classify_emotion,
    inputs=gr.Image(type="pil", label="Upload or Take a Picture"),
    outputs=gr.Textbox(label="Predicted Emotion"),
    title="ViT Emotion Classifier",
    description="Upload or take a photo to predict emotion using a Vision Transformer (ViT) model trained by Dc-4nderson.",
    allow_flagging="never"
)

# Launch
if __name__ == "__main__":
    demo.launch()