vrvundyala's picture
face expressions
efae1c1
import numpy as np
import cv2
from matplotlib import pyplot as plt
import torch
# In the below line,remove '.' while working on your local system.However Make sure that '.' is present before face_recognition_model while uploading to the server, Do not remove it.
from .exp_recognition_model import *
from PIL import Image
import base64
import io
import os
import torch
import torch.nn as nn
from torchvision import models
import torchvision.transforms as transforms
import torch.nn.functional as F
## Add more imports if required
#############################################################################################################################
# Caution: Don't change any of the filenames, function names and definitions #
# Always use the current_path + file_name for refering any files, without it we cannot access files on the server #
#############################################################################################################################
# Current_path stores absolute path of the file from where it runs.
current_path = os.path.dirname(os.path.abspath(__file__))
classes = {0: 'ANGER', 1: 'DISGUST', 2: 'FEAR', 3: 'HAPPINESS', 4: 'NEUTRAL', 5: 'SADNESS', 6: 'SURPRISE'}
#1) The below function is used to detect faces in the given image.
#2) It returns only one image which has maximum area out of all the detected faces in the photo.
#3) If no face is detected,then it returns zero(0).
def detected_face(image):
eye_haar = current_path + '/haarcascade_eye.xml'
face_haar = current_path + '/haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(face_haar)
eye_cascade = cv2.CascadeClassifier(eye_haar)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
face_areas=[]
images = []
required_image=0
for i, (x,y,w,h) in enumerate(faces):
face_cropped = gray[y:y+h, x:x+w]
face_areas.append(w*h)
images.append(face_cropped)
required_image = images[np.argmax(face_areas)]
required_image = Image.fromarray(required_image)
return required_image
#1) Images captured from mobile is passed as parameter to the below function in the API call, It returns the Expression detected by your network.
#2) The image is passed to the function in base64 encoding, Code for decoding the image is provided within the function.
#3) Define an object to your network here in the function and load the weight from the trained network, set it in evaluation mode.
#4) Perform necessary transformations to the input(detected face using the above function), this should return the Expression in string form ex: "Anger"
#5) For loading your model use the current_path+'your model file name', anyhow detailed example is given in comments to the function
##Caution: Don't change the definition or function name; for loading the model use the current_path for path example is given in comments to the function
# def get_expression(img):
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#
# # Recreate the same model architecture
# num_classes = 7 # ๐Ÿ‘ˆ change this to match your training setup
#
# model = models.resnet18(weights=None)
# model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
# # no pretrained weights now
# model.fc = nn.Sequential(
# nn.Linear(model.fc.in_features, 256),
# nn.ReLU(inplace=True),
# nn.Linear(256, num_classes)
# )
#
# model = model.to(device)
#
# # Create the optimizer (same as training)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
#
# # Load the checkpoint
# BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# ckpt_path = os.path.join(BASE_DIR, "expression_model.t7")
# checkpoint = torch.load(ckpt_path, map_location=device)
#
# # Restore weights and optimizer
# model.load_state_dict(checkpoint['model_state_dict'])
# optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
#
# # Put the model in evaluation mode
# model.eval()
#
# ##########################################################################################
# ##Example for loading a model using weight state dictionary: ##
# ## face_det_net = facExpRec() #Example Network ##
# ## model = torch.load(current_path + '/exp_recognition_net.t7', map_location=device) ##
# ## face_det_net.load_state_dict(model['net_dict']) ##
# ## ##
# ##current_path + '/<network_definition>' is path of the saved model if present in ##
# ##the same path as this file, we recommend to put in the same directory ##
# ##########################################################################################
# ##########################################################################################
#
# transform = transforms.Compose([
# transforms.Grayscale(num_output_channels=1),
# transforms.Resize(256),
# transforms.CenterCrop(224),
# transforms.ToTensor(),
# transforms.Normalize(mean=[0.5], std=[0.5])
# ])
#
# face = detected_face(img)
# if face==0:
# face = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY))
#
# face = transform(face).unsqueeze(0).to(device)
# # YOUR CODE HERE, return expression using your model
# with torch.no_grad():
# outputs = model(face)
# probs = F.softmax(outputs, dim=1)
# predicted_class = probs.argmax(dim=1).item()
# return predicted_class
def get_expression(img):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
num_classes = 7 # update as per your dataset
# Recreate exact same architecture as training
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
# Convert first conv layer to accept 1 channel (grayscale)
pretrained_conv = model.conv1.weight
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
with torch.no_grad():
model.conv1.weight = nn.Parameter(pretrained_conv.mean(dim=1, keepdim=True))
# Fully connected head (same as training)
model.fc = nn.Sequential(
nn.Linear(model.fc.in_features, 256),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(256, num_classes)
)
model = model.to(device)
# Load checkpoint
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
ckpt_path = os.path.join(BASE_DIR, "expression_model.t7")
checkpoint = torch.load(ckpt_path, map_location=device)
# Restore weights (no need for optimizer if inference-only)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
# Preprocessing pipeline
transform = transforms.Compose([
transforms.Grayscale(num_output_channels=1),
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5], std=[0.5])
])
face = detected_face(img)
if face == 0:
face = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY))
face = transform(face).unsqueeze(0).to(device)
with torch.no_grad():
outputs = model(face)
probs = F.softmax(outputs, dim=1)
predicted_class = probs.argmax(dim=1).item()
return classes[predicted_class]