Spaces:
Sleeping
Sleeping
| import PIL.Image | |
| from PIL import ImageDraw, ImageFont | |
| import gradio as gr | |
| import numpy as np | |
| from celeb_indicies import * | |
| import PIL | |
| import os | |
| import platform | |
| import time | |
| from huggingface_hub import hf_hub_download | |
| import huggingface_hub | |
| from diffusers import DiffusionPipeline | |
| import torch | |
| from torch import nn | |
| import torchvision | |
| # By default, dlib will be compiled locally when installed via pip, which takes so long it | |
| # Causes huggingface to time out during the build process. | |
| # To avoid this, check if we are running on a Linux system, and if so load a binary of dlib compiled for x86_64 Linux | |
| if(platform.system() == 'Linux'): | |
| os.system("pip install ./dlib-19.24.99-cp310-cp310-linux_x86_64.whl") | |
| os.system("pip install face-recognition") | |
| start_time = time.time() | |
| import face_detection | |
| print(f"took {(time.time() - start_time) / 60} minutes to load face_recognition") | |
| # Change these values to switch the model you are using and the name of the weights file in this model | |
| model_repo_id = "CSSE416-final-project/faceRecogModel" | |
| weight_file_id = "modelWeights101.bin" | |
| # 1. Load the model from Hugging Face Hub | |
| def load_model(repo_id): | |
| # Download the model weights from the repo | |
| weights_path = hf_hub_download(repo_id=model_repo_id, filename=weight_file_id) | |
| # Initialize the ResNet-18 architecture | |
| model = torchvision.models.resnet18(pretrained=False) | |
| num_ftrs = model.fc.in_features | |
| model.fc = nn.Linear(num_ftrs, 100) # Adjust for your number of classes | |
| # Load the model weights | |
| state_dict = torch.load(weights_path, map_location=torch.device("cpu")) | |
| model.load_state_dict(state_dict) | |
| model.eval() # Set the model to evaluation mode | |
| return model | |
| # 2. Load model | |
| model = load_model(model_repo_id) | |
| # 3. Define how to transform image | |
| transforms = torchvision.transforms.Compose( | |
| [ | |
| torchvision.transforms.Resize((224, 224)), | |
| torchvision.transforms.ToTensor() | |
| ]) | |
| # 4. Preprocess and display the image | |
| def process_image_str(groupImageFilePath: str): | |
| groupImage = PIL.Image.open(groupImageFilePath) | |
| locations, images = face_detection.getCroppedImages(groupImage) | |
| groupImage_d = ImageDraw.ImageDraw(groupImage) | |
| font = ImageFont.truetype("Arial Bold.ttf", 30) | |
| labels = "| | Name | Certainty | \n | -------- | ------- | ------- |\n" | |
| n = 1 | |
| for image, location in zip(images, locations): | |
| # Process the image | |
| intputTensor = transforms(image).unsqueeze(0) | |
| # Do AI stuff here and format output | |
| with torch.no_grad(): | |
| outputs_t = model(intputTensor) | |
| cert, pred_t = torch.max(torch.softmax(outputs_t, dim=1), dim=1) | |
| groupImage_d.rectangle(location, outline=(0, 255, 0), width=2) | |
| groupImage_d.text((location[0] + 4, location[1] + 2), f"{n}", fill=(0, 255, 0), font=font) | |
| labels += f"| {n} | {celeb_list[pred_t.item()]} | {int(cert.item() * 100)}% | \n" | |
| n += 1 | |
| return [gr.Image(groupImage), gr.Markdown(labels)] | |
| # return gr.Image(images[0]) | |
| # 5. Create the Gradio interface | |
| interface = gr.Interface( | |
| fn=process_image_str, # Function to process the image | |
| inputs=gr.Image(type='filepath', label="Input Image"), # Upload input | |
| outputs=[gr.Image(label="Output"), gr.Markdown(label="Output Legend")], # Display output | |
| allow_flagging='never', | |
| title="Celebrity Face Detector", | |
| description="Upload a picture of a celebrity or group of celebrities to identify them" | |
| ) | |
| # 6. Launch the app | |
| if __name__ == "__main__": | |
| interface.launch() |