Spaces:
Sleeping
Sleeping
SakibRumu
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -39,13 +39,6 @@ class IR50(nn.Module):
|
|
| 39 |
self.layer2 = resnet.layer2
|
| 40 |
self.downsample = nn.Conv2d(512, 256, 1, stride=2)
|
| 41 |
self.bn_downsample = nn.BatchNorm2d(256, eps=1e-5)
|
| 42 |
-
# Fine-tuned layers (as in training)
|
| 43 |
-
for param in self.conv1.parameters():
|
| 44 |
-
param.requires_grad = True
|
| 45 |
-
for param in self.bn1.parameters():
|
| 46 |
-
param.requires_grad = True
|
| 47 |
-
for param in self.layer1.parameters():
|
| 48 |
-
param.requires_grad = True
|
| 49 |
|
| 50 |
def forward(self, x):
|
| 51 |
x = self.conv1(x)
|
|
@@ -89,7 +82,7 @@ class HLA(nn.Module):
|
|
| 89 |
|
| 90 |
# ViT Stream
|
| 91 |
class ViT(nn.Module):
|
| 92 |
-
def __init__(self, in_channels=256, patch_size=1, embed_dim=768, num_layers=
|
| 93 |
super(ViT, self).__init__()
|
| 94 |
self.patch_embed = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)
|
| 95 |
self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
|
|
@@ -196,10 +189,13 @@ class TripleStreamHLAViT(nn.Module):
|
|
| 196 |
|
| 197 |
# Load the model
|
| 198 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
| 199 |
model = TripleStreamHLAViT(num_classes=7).to(device)
|
| 200 |
model_path = "triple_stream_model_rafdb.pth" # Ensure this file is in the Hugging Face Space repository
|
| 201 |
try:
|
| 202 |
-
|
|
|
|
|
|
|
| 203 |
model.eval()
|
| 204 |
print("Model loaded successfully")
|
| 205 |
except Exception as e:
|
|
@@ -260,10 +256,9 @@ iface = gr.Interface(
|
|
| 260 |
gr.Image(label="Input Image and HLA Heatmap")
|
| 261 |
],
|
| 262 |
title="Facial Emotion Recognition with TripleStreamHLAViT",
|
| 263 |
-
description="Upload an image to predict the facial emotion (Surprise, Fear, Disgust, Happiness, Sadness, Anger, Neutral).
|
| 264 |
examples=[
|
| 265 |
-
["examples/
|
| 266 |
-
["examples/happy.JPEG"],
|
| 267 |
["examples/sadness.jpg"]
|
| 268 |
]
|
| 269 |
)
|
|
|
|
| 39 |
self.layer2 = resnet.layer2
|
| 40 |
self.downsample = nn.Conv2d(512, 256, 1, stride=2)
|
| 41 |
self.bn_downsample = nn.BatchNorm2d(256, eps=1e-5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
def forward(self, x):
|
| 44 |
x = self.conv1(x)
|
|
|
|
| 82 |
|
| 83 |
# ViT Stream
|
| 84 |
class ViT(nn.Module):
|
| 85 |
+
def __init__(self, in_channels=256, patch_size=1, embed_dim=768, num_layers=8, num_heads=12): # 8 layers as in the 82.93% version
|
| 86 |
super(ViT, self).__init__()
|
| 87 |
self.patch_embed = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)
|
| 88 |
self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
|
|
|
|
| 189 |
|
| 190 |
# Load the model
|
| 191 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 192 |
+
print(f"Using device: {device}")
|
| 193 |
model = TripleStreamHLAViT(num_classes=7).to(device)
|
| 194 |
model_path = "triple_stream_model_rafdb.pth" # Ensure this file is in the Hugging Face Space repository
|
| 195 |
try:
|
| 196 |
+
# Map the weights to the appropriate device
|
| 197 |
+
map_location = torch.device('cpu') if not torch.cuda.is_available() else None
|
| 198 |
+
model.load_state_dict(torch.load(model_path, map_location=map_location, weights_only=True))
|
| 199 |
model.eval()
|
| 200 |
print("Model loaded successfully")
|
| 201 |
except Exception as e:
|
|
|
|
| 256 |
gr.Image(label="Input Image and HLA Heatmap")
|
| 257 |
],
|
| 258 |
title="Facial Emotion Recognition with TripleStreamHLAViT",
|
| 259 |
+
description="Upload an image to predict the facial emotion (Surprise, Fear, Disgust, Happiness, Sadness, Anger, Neutral). This model achieves 82.93% test accuracy on the RAF-DB dataset. The HLA heatmap shows where the model focuses.",
|
| 260 |
examples=[
|
| 261 |
+
["examples/surprise.jpg"],
|
|
|
|
| 262 |
["examples/sadness.jpg"]
|
| 263 |
]
|
| 264 |
)
|