Spaces:

COSMOS-Empath-SHP
/

EmpathNet

Sleeping

prekshyam commited on Aug 1, 2025

Commit

ca3b12a

verified ·

1 Parent(s): e7311eb

Adds a textbox for user to determine mask ratio; also includes center crop before resizing.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ pt_model_path = 'MAE1.bin'
 ft_model_path='EmotionClassifier1.bin'
 transform = transforms.Compose([
         transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
         transforms.ToTensor(),
         transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
@@ -64,10 +65,10 @@ yolo_mapping = {
     8: "Surprised"
 }
-def mae_reconstruct(image:Image, figure_name='figure/demo_temp.png'):
     img = transform(image).unsqueeze(0)
     img = img.to(device)
     with torch.no_grad():
         x_enc, mask, ids_restore = mae_model.forward_encoder(img)
         x_rec_patches = mae_model.forward_decoder(x_enc, ids_restore)
@@ -125,11 +126,11 @@ def classify(image:Image):
     return predicted_labels
-def predict(image:Image):
     """
     takes PIL image and return reconstructed image and predicted emotion label
     """
     masked_image, re_image = mae_reconstruct(image, figure_name='figure/demo_temp.png')
     predicted_labels = classify(image)
@@ -145,7 +146,7 @@ demo = gr.Interface(
         gr.Textbox(label='Predicted Emotion')
     ],
     title="Emotion Recognition and MAE Reconstruction",
-    description="Upload an image to see the reconstructed image (by MAE) and the predicted emotion label."
 )
 demo.launch(debug=True)

 ft_model_path='EmotionClassifier1.bin'
 transform = transforms.Compose([
+        transforms.CenterCrop(1024)
         transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
         transforms.ToTensor(),
         transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
     8: "Surprised"
 }
+def mae_reconstruct(image:Image, figure_name='figure/demo_temp.png', mask_ratio=0.75):
     img = transform(image).unsqueeze(0)
     img = img.to(device)
+    mae_model.mask_ratio = mask_ratio
     with torch.no_grad():
         x_enc, mask, ids_restore = mae_model.forward_encoder(img)
         x_rec_patches = mae_model.forward_decoder(x_enc, ids_restore)
     return predicted_labels
+def predict(mask_ratio:float, image:Image):
     """
     takes PIL image and return reconstructed image and predicted emotion label
     """
+    mask_ratio = float(mask_ratio)
     masked_image, re_image = mae_reconstruct(image, figure_name='figure/demo_temp.png')
     predicted_labels = classify(image)
         gr.Textbox(label='Predicted Emotion')
     ],
     title="Emotion Recognition and MAE Reconstruction",
+    description="Upload an image to see the reconstructed image (by MAE) and the predicted emotion label. Please only enter a decimal number greater than or equal to 0.00 and less than 1.00."
 )
 demo.launch(debug=True)