Spaces:
Sleeping
Sleeping
SakibRumu
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,52 +1,49 @@
|
|
| 1 |
import torch
|
|
|
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
-
from transformers import AutoModel
|
| 4 |
from PIL import Image
|
| 5 |
from torchvision import transforms
|
| 6 |
|
| 7 |
-
#
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
# Load the model
|
| 15 |
-
model
|
| 16 |
|
| 17 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 19 |
model.to(device)
|
| 20 |
|
| 21 |
-
#
|
| 22 |
model.eval()
|
| 23 |
|
| 24 |
-
# Image Preprocessing (e.g., resizing and normalization)
|
| 25 |
-
preprocess = transforms.Compose([
|
| 26 |
-
transforms.Resize((224, 224)), # Resize to the expected input size
|
| 27 |
-
transforms.ToTensor(),
|
| 28 |
-
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Standard ImageNet normalization
|
| 29 |
-
])
|
| 30 |
-
|
| 31 |
-
# Prediction function
|
| 32 |
-
def predict_emotion(image):
|
| 33 |
-
image = Image.fromarray(image) # Convert NumPy array to PIL Image
|
| 34 |
-
image = preprocess(image).unsqueeze(0).to(device) # Preprocess and add batch dimension
|
| 35 |
-
|
| 36 |
-
with torch.no_grad():
|
| 37 |
-
outputs = model(image)
|
| 38 |
-
_, predicted = torch.max(outputs, 1) # Get the class with the highest probability
|
| 39 |
-
|
| 40 |
-
# Assuming you have an emotion label list
|
| 41 |
-
emotion_labels = ['Anger', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
|
| 42 |
-
predicted_label = emotion_labels[predicted.item()]
|
| 43 |
-
|
| 44 |
-
# Confidence is the probability of the predicted class
|
| 45 |
-
confidence = torch.nn.functional.softmax(outputs, dim=1)
|
| 46 |
-
predicted_confidence = confidence[0, predicted.item()].item()
|
| 47 |
-
|
| 48 |
-
return predicted_label, round(predicted_confidence * 100, 2)
|
| 49 |
-
|
| 50 |
# Custom CSS for layout styling
|
| 51 |
css = """
|
| 52 |
body {
|
|
@@ -55,26 +52,22 @@ body {
|
|
| 55 |
font-family: Arial, sans-serif;
|
| 56 |
padding: 20px;
|
| 57 |
}
|
| 58 |
-
|
| 59 |
#component-1 {
|
| 60 |
background-color: rgba(255, 255, 255, 0.7);
|
| 61 |
padding: 20px;
|
| 62 |
border-radius: 10px;
|
| 63 |
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
|
| 64 |
}
|
| 65 |
-
|
| 66 |
#component-2 {
|
| 67 |
color: black;
|
| 68 |
font-weight: bold;
|
| 69 |
}
|
| 70 |
-
|
| 71 |
#title {
|
| 72 |
color: white;
|
| 73 |
font-size: 36px;
|
| 74 |
font-weight: bold;
|
| 75 |
text-align: center;
|
| 76 |
}
|
| 77 |
-
|
| 78 |
#description {
|
| 79 |
color: white;
|
| 80 |
font-size: 16px;
|
|
@@ -83,6 +76,29 @@ body {
|
|
| 83 |
}
|
| 84 |
"""
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
# Gradio Interface
|
| 87 |
iface = gr.Interface(
|
| 88 |
fn=predict_emotion,
|
|
|
|
| 1 |
import torch
|
| 2 |
+
import timm
|
| 3 |
+
import torch.nn as nn
|
| 4 |
import gradio as gr
|
|
|
|
| 5 |
from PIL import Image
|
| 6 |
from torchvision import transforms
|
| 7 |
|
| 8 |
+
# Define your custom model architecture (HybridCNNTransformer in this case)
|
| 9 |
+
class HybridCNNTransformer(nn.Module):
|
| 10 |
+
def __init__(self, num_classes=7):
|
| 11 |
+
super(HybridCNNTransformer, self).__init__()
|
| 12 |
+
|
| 13 |
+
# Example: Using ResNet50 from timm as a CNN feature extractor
|
| 14 |
+
self.backbone = timm.create_model('resnet50', pretrained=True)
|
| 15 |
+
|
| 16 |
+
# Example Transformer part (modify according to your model)
|
| 17 |
+
self.transformer = nn.Transformer(d_model=2048, nhead=8, num_encoder_layers=6)
|
| 18 |
+
|
| 19 |
+
# Final fully connected layer (7 classes for emotion recognition)
|
| 20 |
+
self.fc = nn.Linear(2048, num_classes)
|
| 21 |
|
| 22 |
+
def forward(self, x):
|
| 23 |
+
# CNN feature extraction
|
| 24 |
+
cnn_features = self.backbone(x)
|
| 25 |
+
|
| 26 |
+
# Transformer encoding (if applicable, you might not need this part)
|
| 27 |
+
transformer_features = self.transformer(cnn_features, cnn_features)
|
| 28 |
+
|
| 29 |
+
# Final classification layer
|
| 30 |
+
output = self.fc(transformer_features)
|
| 31 |
+
return output
|
| 32 |
|
| 33 |
+
# Load the model
|
| 34 |
+
model = HybridCNNTransformer(num_classes=7)
|
| 35 |
|
| 36 |
+
# Load the weights from the .pth file
|
| 37 |
+
model_path = "transformer_emotion_recognition_model.pth" # Replace with the path to your .pth file
|
| 38 |
+
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu'))) # For CPU; change 'cpu' to 'cuda' for GPU
|
| 39 |
+
|
| 40 |
+
# Move the model to the appropriate device (CUDA or CPU)
|
| 41 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 42 |
model.to(device)
|
| 43 |
|
| 44 |
+
# Set the model to evaluation mode
|
| 45 |
model.eval()
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
# Custom CSS for layout styling
|
| 48 |
css = """
|
| 49 |
body {
|
|
|
|
| 52 |
font-family: Arial, sans-serif;
|
| 53 |
padding: 20px;
|
| 54 |
}
|
|
|
|
| 55 |
#component-1 {
|
| 56 |
background-color: rgba(255, 255, 255, 0.7);
|
| 57 |
padding: 20px;
|
| 58 |
border-radius: 10px;
|
| 59 |
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
|
| 60 |
}
|
|
|
|
| 61 |
#component-2 {
|
| 62 |
color: black;
|
| 63 |
font-weight: bold;
|
| 64 |
}
|
|
|
|
| 65 |
#title {
|
| 66 |
color: white;
|
| 67 |
font-size: 36px;
|
| 68 |
font-weight: bold;
|
| 69 |
text-align: center;
|
| 70 |
}
|
|
|
|
| 71 |
#description {
|
| 72 |
color: white;
|
| 73 |
font-size: 16px;
|
|
|
|
| 76 |
}
|
| 77 |
"""
|
| 78 |
|
| 79 |
+
# Image Preprocessing for the model (assuming the model was trained with resized and normalized images)
|
| 80 |
+
preprocess = transforms.Compose([
|
| 81 |
+
transforms.Resize((224, 224)), # Adjust according to your model's input size
|
| 82 |
+
transforms.ToTensor(),
|
| 83 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Standard ImageNet normalization
|
| 84 |
+
])
|
| 85 |
+
|
| 86 |
+
# Prediction function
|
| 87 |
+
def predict_emotion(image):
|
| 88 |
+
# Preprocess the image
|
| 89 |
+
image_tensor = preprocess(image).unsqueeze(0).to(device) # Add batch dimension and move to device
|
| 90 |
+
|
| 91 |
+
# Make prediction
|
| 92 |
+
with torch.no_grad():
|
| 93 |
+
output = model(image_tensor)
|
| 94 |
+
_, predicted = torch.max(output, 1) # Get the predicted class
|
| 95 |
+
confidence = torch.nn.functional.softmax(output, dim=1).max().item() # Confidence score
|
| 96 |
+
|
| 97 |
+
# Return the predicted emotion label and confidence score
|
| 98 |
+
emotions = ["Anger", "Disgust", "Fear", "Happiness", "Sadness", "Surprise", "Neutral"] # Modify labels as per your model
|
| 99 |
+
predicted_emotion = emotions[predicted.item()]
|
| 100 |
+
return predicted_emotion, confidence
|
| 101 |
+
|
| 102 |
# Gradio Interface
|
| 103 |
iface = gr.Interface(
|
| 104 |
fn=predict_emotion,
|