Spaces:
Running
Running
| import io | |
| import base64 | |
| import numpy as np | |
| from uuid import uuid4 | |
| from PIL import Image as PILImage | |
| from models.face_recognition import EnsembleFaceRecognition, extract_faces, extract_faces_mediapipe | |
| from utils.vtt_parser import parse_vtt_offsets | |
| def get_face_predictions(face, ensemble, data_manager, results): | |
| """ | |
| Get predictions for a single face | |
| Parameters: | |
| face: Face image array | |
| ensemble: EnsembleFaceRecognition instance | |
| data_manager: DataManager instance | |
| results: Number of results to return | |
| Returns: | |
| List of (name, confidence) tuples | |
| """ | |
| # Create batch with original and flipped images | |
| face_batch = np.stack([face, face[:, ::-1, :]], axis=0) | |
| # Get embeddings for both orientations in one batch call | |
| embeddings_batch = ensemble.get_face_embeddings_batch(face_batch) | |
| # Average the embeddings across orientations | |
| facenet = np.mean(embeddings_batch['facenet'], axis=0) | |
| arc = np.mean(embeddings_batch['arc'], axis=0) | |
| # Get predictions from both models | |
| model_predictions = { | |
| 'facenet': data_manager.query_facenet_index(facenet, max(results, 50)), | |
| 'arc': data_manager.query_arc_index(arc, max(results, 50)), | |
| } | |
| return ensemble.ensemble_prediction(model_predictions) | |
| def image_search_performers(image, data_manager, threshold=0.5, results=3): | |
| """ | |
| Search for multiple performers in an image | |
| Parameters: | |
| image: PIL Image object | |
| data_manager: DataManager instance | |
| threshold: Confidence threshold | |
| results: Number of results to return | |
| Returns: | |
| List of dictionaries with face image and performer information | |
| """ | |
| image_array = np.array(image) | |
| ensemble = EnsembleFaceRecognition({"facenet": 1.0, "arc": 1.0}) | |
| try: | |
| faces = extract_faces(image_array) | |
| except ValueError: | |
| raise ValueError("No faces found") | |
| response = [] | |
| for face in faces: | |
| predictions = get_face_predictions(face['face'], ensemble, data_manager, results) | |
| # Crop and encode face image | |
| area = face['facial_area'] | |
| cimage = image.crop((area['x'], area['y'], area['x'] + area['w'], area['y'] + area['h'])) | |
| buf = io.BytesIO() | |
| cimage.save(buf, format='JPEG') | |
| im_b64 = base64.b64encode(buf.getvalue()).decode('ascii') | |
| # Get performer information | |
| performers = [] | |
| for name, confidence in predictions: | |
| performer_info = data_manager.get_performer_info(data_manager.faces[name], confidence) | |
| if performer_info: | |
| performers.append(performer_info) | |
| response.append({ | |
| 'image': im_b64, | |
| 'area': area, | |
| 'confidence': face['confidence'], | |
| 'performers': performers | |
| }) | |
| return response | |
| def find_faces_in_sprite(image, vtt_file): | |
| """ | |
| Find faces in a sprite image using VTT data | |
| Parameters: | |
| image: PIL Image object | |
| vtt_file: File object containing VTT data | |
| Returns: | |
| List of dictionaries with face information | |
| """ | |
| with open(vtt_file.name, 'r', encoding='utf-8') as f: | |
| vtt = f.read().encode('utf-8') | |
| sprite = PILImage.fromarray(image) | |
| results = [] | |
| for i, (left, top, right, bottom, time_seconds) in enumerate(parse_vtt_offsets(vtt)): | |
| cut_frame = sprite.crop((left, top, left + right, top + bottom)) | |
| faces = extract_faces_mediapipe(np.asarray(cut_frame), enforce_detection=False, align=False) | |
| faces = [face for face in faces if face['confidence'] > 0.6] | |
| if faces: | |
| size = faces[0]['facial_area']['w'] * faces[0]['facial_area']['h'] | |
| data = {'id': str(uuid4()), "offset": (left, top, right, bottom), "frame": i, "time": time_seconds, 'size': size} | |
| results.append(data) |