Winston de Jong
Add list of celebrities and indices
c8d4b10
raw
history blame
3.33 kB
import PIL.Image
import gradio as gr
import numpy as np
from celeb_indicies import *
import PIL
import os
import platform
import time
from huggingface_hub import hf_hub_download
import huggingface_hub
# import spaces #[uncomment to use ZeroGPU]
from diffusers import DiffusionPipeline
import torch
from torch import nn
import torchvision
# by default, dlib will be compiled locally when installed via pip, which takes so long it
# causes huggingface to time out during the build process.
# To avoid this, check if we are running on a Linux system, and if so load a binary of dlib compiled for x86_64 Linux
if(platform.system() == 'Linux'):
os.system("pip install ./dlib-19.24.99-cp310-cp310-linux_x86_64.whl")
os.system("pip install face-recognition")
start_time = time.time()
import face_detection
print(f"took {(time.time() - start_time) / 60} minutes to load face_recognition")
# # Function to display the uploaded image
# def process_image(image : PIL.Image.Image):
# outputs = face_detection.getCroppedImages(image)
# # do AI stuff here
# return gr.Image(outputs[0])
model_repo_id = "CSSE416-final-project/faceRecogModel"
weight_file_id = "modelWeights100.bin"
# 1. Load the model from Hugging Face Hub
def load_model(repo_id):
# Download the model weights from the repo
weights_path = hf_hub_download(repo_id=model_repo_id, filename=weight_file_id)
# Initialize the ResNet-18 architecture
model = torchvision.models.resnet18(pretrained=True) # TODO: does it matter if this is set to true or false?
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 100) # Adjust for your task (e.g., 128 classes)
# TODO: check if this number^^ corresponds to the number of classes
# Load the model weights
state_dict = torch.load(weights_path, map_location=torch.device("cpu"))
model.load_state_dict(state_dict)
model.eval() # Set the model to evaluation mode
return model
# 2. Load model
model = load_model(model_repo_id)
# 3. Define how to transform image
transforms = torchvision.transforms.Compose(
[
torchvision.transforms.ToTensor()
])
# 4. Preprocess and display the image
def process_image_str(groupImageFilePath: str):
groupImage = PIL.Image.open(groupImageFilePath)
locations, images = face_detection.getCroppedImages(groupImage)
outputLabels = []
for image in images:
# Process the image
intputTensor = transforms(image).unsqueeze(0) # unsqueeze? add batch dimension??
# do AI stuff here
with torch.no_grad():
outputs_t = model(intputTensor)
print(outputs_t)
temp, pred_t = torch.max(outputs_t, dim=1)
print(temp)
outputLabels.append(celeb_list[pred_t.item()])
#return gr.Image(image)
print(outputLabels)
return outputLabels.pop(0)
# 5. Create the Gradio interface
interface = gr.Interface(
fn=process_image_str, # Function to process the image
inputs=gr.Image(type='filepath'), # Upload input
# outputs=gr.Image(), # Display output
outputs='text',
allow_flagging='never',
title="Celebrity Face Detector",
description="Upload a picture of a celebrity or group of celebrities to identify them"
)
# 6. Launch the app
if __name__ == "__main__":
interface.launch()