| import os |
| import gradio as gr |
| import numpy as np |
| import cv2 |
| from PIL import Image, ImageOps |
| from insightface.app import FaceAnalysis |
| from hsemotion_onnx.facial_emotions import HSEmotionRecognizer |
|
|
|
|
| def exif_transpose(img): |
| |
| if hasattr(ImageOps, 'exif_transpose'): |
| |
| return ImageOps.exif_transpose(img) |
|
|
| exif_orientation_tag = 274 |
|
|
| |
| if hasattr(img, "_getexif") and isinstance(img._getexif(), dict) and exif_orientation_tag in img._getexif(): |
| exif_data = img._getexif() |
| orientation = exif_data[exif_orientation_tag] |
|
|
| |
| if orientation == 1: |
| |
| pass |
| elif orientation == 2: |
| |
| img = img.transpose(Image.FLIP_LEFT_RIGHT) |
| elif orientation == 3: |
| |
| img = img.rotate(180) |
| elif orientation == 4: |
| |
| img = img.rotate(180).transpose(Image.FLIP_LEFT_RIGHT) |
| elif orientation == 5: |
| |
| img = img.rotate(-90, expand=True).transpose(Image.FLIP_LEFT_RIGHT) |
| elif orientation == 6: |
| |
| img = img.rotate(-90, expand=True) |
| elif orientation == 7: |
| |
| img = img.rotate(90, expand=True).transpose(Image.FLIP_LEFT_RIGHT) |
| elif orientation == 8: |
| |
| img = img.rotate(90, expand=True) |
|
|
| return img |
|
|
|
|
| def resize(image, target_size): |
| |
| width, height = image.size |
|
|
| |
| scaling_factor = min(target_size[0] / width, target_size[1] / height) |
| target_height = int(scaling_factor * height) |
| target_width = int(scaling_factor * width) |
|
|
| |
| resized_image = image.resize((target_width, target_height), resample=Image.NEAREST) |
|
|
| return resized_image |
|
|
|
|
| def facial_emotion_recognition(img): |
|
|
| img = np.asarray(resize(exif_transpose(img), target_size)) |
| |
| faces = face_detector.get(img) |
|
|
| if len(faces) > 0: |
|
|
| highest_score_box = (0, 0, 0, 0) |
| highest_score = 0 |
|
|
| for face in faces: |
| if face['det_score'] > highest_score: |
| highest_score = face['det_score'] |
| x1, y1, x2, y2 = face['bbox'].astype(int) |
| x_margin = int((x2 - x1) * face_margin) |
| y_margin = int((y2 - y1) * face_margin) |
| x = max(0, x1 - x_margin) |
| y = max(0, y1 - y_margin) |
| w = min(x2 + x_margin, img.shape[1]) - x |
| h = min(y2 + y_margin, img.shape[0]) - y |
| highest_score_box = (x, y, w, h) |
|
|
| x, y, w, h = highest_score_box |
| emotion, _ = hse_emo_model.predict_emotions(img[y:y+h, x:x+w], logits=True) |
|
|
| cv2.rectangle(img, (x, y), (x+w, y+h), (0, 0, 255), 2) |
| cv2.putText(img, emotion, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA) |
|
|
| return img |
|
|
| face_margin = 0.1 |
| target_size = (640, 640) |
| model_name = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'buffalo_sc') |
| face_detector = FaceAnalysis(name=model_name, allowed_modules=['detection'], providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) |
| face_detector.prepare(ctx_id=0, det_size=(640, 640)) |
|
|
|
|
| hse_emo_model = HSEmotionRecognizer(model_name='enet_b0_8_best_vgaf') |
|
|
| webcam = gr.Image(type='pil', source='webcam', label='Input Image') |
| webcam_output = gr.Image(image_mode='RGB', type='numpy', label='Output Image') |
| webcam_interface = gr.Interface(facial_emotion_recognition, inputs=webcam, outputs=webcam_output) |
|
|
| upload = gr.Image(type='pil', source='upload', label='Input Image') |
| upload_output = gr.Image(image_mode='RGB', type='numpy', label='Output Image') |
| upload_interface = gr.Interface(facial_emotion_recognition, inputs=upload, outputs=upload_output, examples='examples') |
|
|
| demo = gr.TabbedInterface(interface_list=[upload_interface, webcam_interface], tab_names=['Upload', 'Webcam']) |
| demo.launch() |
|
|