Spaces:
Running
Running
File size: 3,842 Bytes
62a013b e0e6eff 62a013b 9b06224 62a013b d8aa102 62a013b 86e1f1d 62a013b 86e1f1d 62a013b 395ed84 62a013b 86e1f1d 62a013b 395ed84 62a013b 86e1f1d 62a013b 86e1f1d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import io
import base64
import numpy as np
from uuid import uuid4
from PIL import Image as PILImage
from models.face_recognition import EnsembleFaceRecognition, extract_faces, extract_faces_mediapipe
from utils.vtt_parser import parse_vtt_offsets
def get_face_predictions(face, ensemble, data_manager, results):
"""
Get predictions for a single face
Parameters:
face: Face image array
ensemble: EnsembleFaceRecognition instance
data_manager: DataManager instance
results: Number of results to return
Returns:
List of (name, confidence) tuples
"""
# Create batch with original and flipped images
face_batch = np.stack([face, face[:, ::-1, :]], axis=0)
# Get embeddings for both orientations in one batch call
embeddings_batch = ensemble.get_face_embeddings_batch(face_batch)
# Average the embeddings across orientations
facenet = np.mean(embeddings_batch['facenet'], axis=0)
arc = np.mean(embeddings_batch['arc'], axis=0)
# Get predictions from both models
model_predictions = {
'facenet': data_manager.query_facenet_index(facenet, max(results, 50)),
'arc': data_manager.query_arc_index(arc, max(results, 50)),
}
return ensemble.ensemble_prediction(model_predictions)
def image_search_performers(image, data_manager, threshold=0.5, results=3):
"""
Search for multiple performers in an image
Parameters:
image: PIL Image object
data_manager: DataManager instance
threshold: Confidence threshold
results: Number of results to return
Returns:
List of dictionaries with face image and performer information
"""
image_array = np.array(image)
ensemble = EnsembleFaceRecognition({"facenet": 1.0, "arc": 1.0})
try:
faces = extract_faces(image_array)
except ValueError:
raise ValueError("No faces found")
response = []
for face in faces:
predictions = get_face_predictions(face['face'], ensemble, data_manager, results)
# Crop and encode face image
area = face['facial_area']
cimage = image.crop((area['x'], area['y'], area['x'] + area['w'], area['y'] + area['h']))
buf = io.BytesIO()
cimage.save(buf, format='JPEG')
im_b64 = base64.b64encode(buf.getvalue()).decode('ascii')
# Get performer information
performers = []
for name, confidence in predictions:
performer_info = data_manager.get_performer_info(data_manager.faces[name], confidence)
if performer_info:
performers.append(performer_info)
response.append({
'image': im_b64,
'area': area,
'confidence': face['confidence'],
'performers': performers
})
return response
def find_faces_in_sprite(image, vtt_file):
"""
Find faces in a sprite image using VTT data
Parameters:
image: PIL Image object
vtt_file: File object containing VTT data
Returns:
List of dictionaries with face information
"""
with open(vtt_file.name, 'r', encoding='utf-8') as f:
vtt = f.read().encode('utf-8')
sprite = PILImage.fromarray(image)
results = []
for i, (left, top, right, bottom, time_seconds) in enumerate(parse_vtt_offsets(vtt)):
cut_frame = sprite.crop((left, top, left + right, top + bottom))
faces = extract_faces_mediapipe(np.asarray(cut_frame), enforce_detection=False, align=False)
faces = [face for face in faces if face['confidence'] > 0.6]
if faces:
size = faces[0]['facial_area']['w'] * faces[0]['facial_area']['h']
data = {'id': str(uuid4()), "offset": (left, top, right, bottom), "frame": i, "time": time_seconds, 'size': size}
results.append(data) |