Spaces:
Runtime error
Runtime error
Gradio app
Browse files- .gitignore +4 -0
- README.md +14 -3
- actors_matching/__init__.py +0 -0
- actors_matching/api.py +38 -0
- app.py +58 -0
- images/example_hannibal_barca.jpg +0 -0
- images/example_joan_of_arc.jpg +0 -0
- images/example_marie_curie.jpg +0 -0
- images/example_scipio_africanus.jpg +0 -0
- models/actors_annoy_index.ann +0 -0
- models/actors_annoy_metadata.json +1 -0
- models/actors_mapping.json +0 -0
- requirements.txt +3 -1
.gitignore
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# data files from imdb
|
| 2 |
data/title.*.tsv*
|
| 3 |
data/name.*.tsv*
|
| 4 |
|
| 5 |
# Byte-compiled / optimized / DLL files
|
| 6 |
__pycache__/
|
|
|
|
| 7 |
*.py[cod]
|
| 8 |
*$py.class
|
| 9 |
|
|
|
|
| 1 |
+
# IDE
|
| 2 |
+
.vscode
|
| 3 |
+
|
| 4 |
# data files from imdb
|
| 5 |
data/title.*.tsv*
|
| 6 |
data/name.*.tsv*
|
| 7 |
|
| 8 |
# Byte-compiled / optimized / DLL files
|
| 9 |
__pycache__/
|
| 10 |
+
*/__pycache__/
|
| 11 |
*.py[cod]
|
| 12 |
*$py.class
|
| 13 |
|
README.md
CHANGED
|
@@ -19,7 +19,18 @@ Note that due to API limits, I only took images from 1,000 actors.
|
|
| 19 |
|
| 20 |
The application is built with Gradio and deployed on HuggingFace Space. In the background, it uses:
|
| 21 |
|
| 22 |
-
1. The [`face_recognition` library](https://github.com/ageitgey/face_recognition) to compute an embedding of
|
| 23 |
-
2. Spotify's `annoy` library to efficiently search the closest actors based on the
|
| 24 |
-
3. Show you
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
|
|
|
| 19 |
|
| 20 |
The application is built with Gradio and deployed on HuggingFace Space. In the background, it uses:
|
| 21 |
|
| 22 |
+
1. The [`face_recognition` library](https://github.com/ageitgey/face_recognition) to extract the location of faces in the image and compute an embedding of these faces
|
| 23 |
+
2. Spotify's `annoy` library to efficiently search the closest actors based on the face embedding and a small database of actors' faces embeddings.
|
| 24 |
+
3. Show you the best matches!
|
| 25 |
+
|
| 26 |
+
This is meant to be a fun and tiny application. There are known issues and biases.
|
| 27 |
+
|
| 28 |
+
## Known biases and limitations
|
| 29 |
+
|
| 30 |
+
There are a few issues with the dataset and models used:
|
| 31 |
+
|
| 32 |
+
- The dataset of actors is limited to a couple thousands actors and actresses and it is therefore not representative of the richness of professionals out there
|
| 33 |
+
- The subset of actors and actresses selected is based on an aggregated metrics that considers all movies and shows in which the person was listed as an actor/actress. It is the weighted sum of the number of IMDb votes for this movie/show, weighted by the average IMDb score. This is obviously only a rough indicator of popularity but provided me with a quick way of getting a dataset with actors that people may know.
|
| 34 |
+
- Given the above, the database sampling will have several biases that are intrinsic to (a) the IMDb database and user base itself which is biased towards western/American movies, (b) the movie industry itself with a dominance of white male actors
|
| 35 |
+
- The pictures of actors and actresses was done through a simple Bing Search and not manually verified, there are several mistakes. For example, Graham Greene has a mix of pictures from Graham Greene, the canadian actor, and Graham Greene, the writer. You may get surprising results from time to time! Let me know if you find mistakes
|
| 36 |
|
actors_matching/__init__.py
ADDED
|
File without changes
|
actors_matching/api.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import face_recognition
|
| 2 |
+
import json
|
| 3 |
+
import annoy
|
| 4 |
+
from typing import Tuple
|
| 5 |
+
|
| 6 |
+
EMBEDDING_DIMENSION=128
|
| 7 |
+
ANNOY_INDEX_FILE = "models/actors_annoy_index.ann"
|
| 8 |
+
ANNOY_METADATA_FILE = "models/actors_annoy_metadata.json"
|
| 9 |
+
ANNOY_MAPPING_FILE = "models/actors_mapping.json"
|
| 10 |
+
|
| 11 |
+
def load_annoy_index(
|
| 12 |
+
index_file = ANNOY_INDEX_FILE,
|
| 13 |
+
metadata_file = ANNOY_METADATA_FILE,
|
| 14 |
+
mapping_file = ANNOY_MAPPING_FILE
|
| 15 |
+
) -> Tuple[annoy.AnnoyIndex, dict]:
|
| 16 |
+
"""Load annoy index and associated mapping file"""
|
| 17 |
+
with open(metadata_file) as f:
|
| 18 |
+
annoy_index_metadata = json.load(f)
|
| 19 |
+
|
| 20 |
+
annoy_index = annoy.AnnoyIndex(f=EMBEDDING_DIMENSION, **annoy_index_metadata)
|
| 21 |
+
annoy_index.load(index_file)
|
| 22 |
+
|
| 23 |
+
with open(mapping_file) as f:
|
| 24 |
+
mapping = json.load(f)
|
| 25 |
+
mapping = {int(k): v for k, v in mapping.items()}
|
| 26 |
+
return annoy_index, mapping
|
| 27 |
+
|
| 28 |
+
def analyze_image(image, annoy_index, n_matches: int = 1, num_jitters: int = 1, model: str = "large"):
|
| 29 |
+
"""Extract face location, embeddings, and top n_matches matches"""
|
| 30 |
+
face_locations = face_recognition.face_locations(image)
|
| 31 |
+
embeddings = face_recognition.face_encodings(image, num_jitters=num_jitters, model=model, known_face_locations=face_locations)
|
| 32 |
+
matches = []
|
| 33 |
+
distances = []
|
| 34 |
+
for emb in embeddings:
|
| 35 |
+
m, d = annoy_index.get_nns_by_vector(emb, n_matches, include_distances=True)
|
| 36 |
+
matches.append(m)
|
| 37 |
+
distances.append(d)
|
| 38 |
+
return [dict(embeddings=e, matches=m, distances=d, face_locations=f) for e,m,d,f in zip(embeddings, matches, distances, face_locations)]
|
app.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import numpy as np
|
| 3 |
+
from actors_matching.api import analyze_image, load_annoy_index
|
| 4 |
+
|
| 5 |
+
annoy_index, actors_mapping = load_annoy_index()
|
| 6 |
+
|
| 7 |
+
def get_image_html(actor: dict):
|
| 8 |
+
url = actor["url"]
|
| 9 |
+
name = actor["name"]
|
| 10 |
+
imdb_url = f"https://www.imdb.com/name/{actor['nconst']}/"
|
| 11 |
+
return f'''
|
| 12 |
+
<div style="position: relative; text-align: center; color: white;">
|
| 13 |
+
<img src="{url}" alt="{name} matches the input image" style="height: 500px">
|
| 14 |
+
<div style="padding: 0.2em; position: absolute; bottom: 16px; left: 16px; background-color: #aacccccc; font-size: 2em;">
|
| 15 |
+
<p>{name}</p>
|
| 16 |
+
<p style="font-size:0.5em"><a href={imdb_url} target="_blank">Click to see on IMDb</></p>
|
| 17 |
+
</div>
|
| 18 |
+
</div>
|
| 19 |
+
'''
|
| 20 |
+
|
| 21 |
+
def get_best_matches(image, n_matches: int):
|
| 22 |
+
return analyze_image(image, annoy_index=annoy_index, n_matches=n_matches)
|
| 23 |
+
|
| 24 |
+
def find_matching_actors(input_img, title, n_matches: int = 10):
|
| 25 |
+
best_matches_list = get_best_matches(input_img, n_matches=n_matches)
|
| 26 |
+
best_matches = best_matches_list[0] # TODO: allow looping through characters
|
| 27 |
+
|
| 28 |
+
# Show how the initial image was parsed (ie: which person is displayed)
|
| 29 |
+
|
| 30 |
+
# Build htmls to display the result
|
| 31 |
+
output_htmls = []
|
| 32 |
+
for match in best_matches["matches"]:
|
| 33 |
+
actor = actors_mapping[match]
|
| 34 |
+
output_htmls.append(get_image_html(actor))
|
| 35 |
+
|
| 36 |
+
return output_htmls
|
| 37 |
+
|
| 38 |
+
iface = gr.Interface(
|
| 39 |
+
find_matching_actors,
|
| 40 |
+
title="Which actor or actress looks like you?",
|
| 41 |
+
description="""Who is the best person to play a movie about you? Upload a picture and find out!
|
| 42 |
+
Or maybe you'd like to know who would best interpret your favorite historical character?
|
| 43 |
+
Give it a shot or try one of the sample images below.""",
|
| 44 |
+
inputs=[
|
| 45 |
+
gr.inputs.Image(shape=(256, 256), label="Your image"),
|
| 46 |
+
gr.inputs.Textbox(label="Who's that?", placeholder="Optional, you can leave this blank"),
|
| 47 |
+
#gr.inputs.Slider(minimum=1, maximum=10, step=1, default=5, label="Number of matches"),
|
| 48 |
+
],
|
| 49 |
+
outputs=gr.outputs.Carousel(gr.outputs.HTML(), label="Matching actors & actresses"),
|
| 50 |
+
examples=[
|
| 51 |
+
["images/example_marie_curie.jpg", "Marie Curie"],
|
| 52 |
+
["images/example_hannibal_barca.jpg", "Hannibal (the one with the elephants...)"],
|
| 53 |
+
["images/example_scipio_africanus.jpg", "Scipio Africanus"],
|
| 54 |
+
["images/example_joan_of_arc.jpg", "Jeanne d'Arc"]
|
| 55 |
+
]
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
iface.launch()
|
images/example_hannibal_barca.jpg
ADDED
|
images/example_joan_of_arc.jpg
ADDED
|
images/example_marie_curie.jpg
ADDED
|
images/example_scipio_africanus.jpg
ADDED
|
models/actors_annoy_index.ann
ADDED
|
Binary file (1.52 MB). View file
|
|
|
models/actors_annoy_metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"metric": "angular"}
|
models/actors_mapping.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
CHANGED
|
@@ -1,7 +1,9 @@
|
|
| 1 |
-
#
|
| 2 |
cmake # required for dlib (used by face_recognition)
|
| 3 |
face_recognition
|
| 4 |
annoy
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# Preprocessing
|
| 7 |
microsoft-bing-imagesearch
|
|
|
|
| 1 |
+
# App
|
| 2 |
cmake # required for dlib (used by face_recognition)
|
| 3 |
face_recognition
|
| 4 |
annoy
|
| 5 |
+
matplotlib
|
| 6 |
+
gradio
|
| 7 |
|
| 8 |
# Preprocessing
|
| 9 |
microsoft-bing-imagesearch
|