Spaces:

Koottu
/

FaceMatch-Azure-Dev

Runtime error

App Files Files Community

ashutosh-koottu commited on Jan 19

Commit

a1075ab

1 Parent(s): a54e757

Adds caching and parallel processing

Browse files

Files changed (1) hide show

handler.py +99 -58

handler.py CHANGED Viewed

@@ -15,6 +15,7 @@ from PIL import Image
 from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
 from config import get_config
 import time
 class EndpointHandler:
     def __init__(self, model_dir=None):
@@ -45,6 +46,12 @@ class EndpointHandler:
         # Get container client
         self.container_client = self.blob_service_client.get_container_client(self.container_name)
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         try:
@@ -79,7 +86,15 @@ class EndpointHandler:
             raise ValueError("Invalid JSON structure.")
     def load_embeddings_from_azure(self):
-        """Load existing embeddings from Azure Blob Storage if they exist, else return an empty list."""
         try:
             # Check if embeddings file exists in Azure - look in profile-media/embeddings/
             blob_name = f'profile-media/embeddings/embeddings_db.json'
@@ -94,9 +109,14 @@ class EndpointHandler:
                 download_file.write(download_stream.readall())
             with open(temp_file_path, 'r') as f:
-                return json.load(f)
         except Exception as e:
             print(f'Embeddings file not found in Azure, initializing a new one: {e}')
             return []
     def extract_and_save_embeddings(self):
@@ -225,66 +245,57 @@ class EndpointHandler:
         print(f"Debug: Starting similarity search with {len(query_images)} query images")
         print(f"Debug: Looking for gender: {gender}, top_n: {top_n}")
         similarities = {}
         for i, image_input in enumerate(query_images):
-            print(f"Debug: Processing query image {i+1}/{len(query_images)}: {image_input}")
             try:
-                # Determine the type of image input
-                if image_input.startswith('http'):
-                    # It's a URL
-                    img = self.load_image_from_url(image_input)
-                elif image_input.startswith('data:image/'):
-                    # It's a base64-encoded image
-                    img = self.load_image_from_base64(image_input)
-                else:
-                    # It's a local file path reference - convert to full Azure blob URL
-                    blob_url = f"https://koottuprod.blob.core.windows.net/koottu-media/{image_input}"
-                    img = self.load_image_from_url(blob_url)
-                if img is None:
-                    print(f"Failed to load image: {image_input}")
-                    continue
-                faces = self.app.get(img)
-                if len(faces) == 0:
-                    print(f"Debug: No faces detected in query image {i+1}")
-                    continue
-                query_embedding = faces[0].embedding
-                print(f"Debug: Successfully extracted face embedding from query image {i+1}")
-                # Load embeddings database from Azure
-                embeddings_db = self.load_embeddings_from_azure()
-                print(f"Debug: Total embeddings in database: {len(embeddings_db)}")
-                # Filter to only include images from profile-media folder structure
-                profile_media_db = [item for item in embeddings_db if 'image_url' in item and 'profile-media' in item['image_url']]
-                print(f"Debug: Profile-media embeddings: {len(profile_media_db)}")
-                # Filter by gender: if 'all', include all items with gender field; otherwise filter by specific gender
-                if gender == 'all':
-                    filtered_db = [item for item in profile_media_db if 'gender' in item and 'embedding' in item]
-                else:
-                    filtered_db = [item for item in profile_media_db if 'gender' in item and item['gender'] == gender and 'embedding' in item]
-                print(f"Debug: Filtered by gender '{gender}': {len(filtered_db)}")
-                if len(filtered_db) == 0:
-                    print(f"Debug: No embeddings found for gender '{gender}' in profile-media folder")
-                    print(f"Debug: Available genders in profile-media: {list(set([item.get('gender') for item in profile_media_db if 'gender' in item]))}")
-                    continue
-                for item in filtered_db:
-                    similarity = 1 - cosine(query_embedding, np.array(item['embedding']))
-                    if item['image_url'] in similarities:
-                        similarities[item['image_url']].append(similarity)
-                    else:
-                        similarities[item['image_url']] = [similarity]
             except Exception as e:
-                error_message = f"Error processing image input: {e}"
-                print(error_message)
-                # Return empty list instead of error dict
-                return []
         # Aggregate similarities
         print(f"Debug: Total similarities found: {len(similarities)}")
@@ -293,6 +304,36 @@ class EndpointHandler:
         result = [url for _, url in aggregated_similarities[:top_n]]
         print(f"Debug: Returning {len(result)} recommendations")
         return result
     def find_similar_images_by_embedding(self, query_embedding: np.ndarray, gender: str = 'all', top_n: int = 10, excluded_images: List[str] = None) -> List[str]:
         """Find similar images based on a given embedding vector."""

 from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
 from config import get_config
 import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
 class EndpointHandler:
     def __init__(self, model_dir=None):
         # Get container client
         self.container_client = self.blob_service_client.get_container_client(self.container_name)
+        # Initialize caching
+        self.embeddings_cache = None
+        self.cache_timestamp = 0
+        self.cache_ttl = 3600  # 1 hour in seconds
+        self.thread_pool = ThreadPoolExecutor(max_workers=4)
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         try:
             raise ValueError("Invalid JSON structure.")
     def load_embeddings_from_azure(self):
+        """Load existing embeddings from Azure Blob Storage with caching."""
+        current_time = time.time()
+        # Return cached embeddings if still valid
+        if self.embeddings_cache is not None and (current_time - self.cache_timestamp) < self.cache_ttl:
+            print(f"Using cached embeddings (age: {int(current_time - self.cache_timestamp)}s)")
+            return self.embeddings_cache
+        print("Fetching embeddings from Azure...")
         try:
             # Check if embeddings file exists in Azure - look in profile-media/embeddings/
             blob_name = f'profile-media/embeddings/embeddings_db.json'
                 download_file.write(download_stream.readall())
             with open(temp_file_path, 'r') as f:
+                self.embeddings_cache = json.load(f)
+                self.cache_timestamp = current_time
+                print(f"Loaded {len(self.embeddings_cache)} embeddings from Azure")
+                return self.embeddings_cache
         except Exception as e:
             print(f'Embeddings file not found in Azure, initializing a new one: {e}')
+            self.embeddings_cache = []
+            self.cache_timestamp = current_time
             return []
     def extract_and_save_embeddings(self):
         print(f"Debug: Starting similarity search with {len(query_images)} query images")
         print(f"Debug: Looking for gender: {gender}, top_n: {top_n}")
+        # Load embeddings database once (cached)
+        embeddings_db = self.load_embeddings_from_azure()
+        print(f"Debug: Total embeddings in database: {len(embeddings_db)}")
+        # Filter to only include images from profile-media folder structure
+        profile_media_db = [item for item in embeddings_db if 'image_url' in item and 'profile-media' in item['image_url']]
+        print(f"Debug: Profile-media embeddings: {len(profile_media_db)}")
+        # Filter by gender: if 'all', include all items with gender field; otherwise filter by specific gender
+        if gender == 'all':
+            filtered_db = [item for item in profile_media_db if 'gender' in item and 'embedding' in item]
+        else:
+            filtered_db = [item for item in profile_media_db if 'gender' in item and item['gender'] == gender and 'embedding' in item]
+        print(f"Debug: Filtered by gender '{gender}': {len(filtered_db)}")
+        if len(filtered_db) == 0:
+            print(f"Debug: No embeddings found for gender '{gender}' in profile-media folder")
+            return []
+        # Process query images in parallel
         similarities = {}
+        futures = {}
         for i, image_input in enumerate(query_images):
+            future = self.thread_pool.submit(self._extract_query_embedding, image_input, i)
+            futures[future] = i
+        # Collect results from parallel processing
+        query_embeddings = []
+        for future in as_completed(futures):
+            i = futures[future]
             try:
+                query_embedding = future.result()
+                if query_embedding is not None:
+                    query_embeddings.append(query_embedding)
+                    print(f"Debug: Successfully extracted face embedding from query image {i+1}")
             except Exception as e:
+                print(f"Debug: Error processing query image {i+1}: {e}")
+        if not query_embeddings:
+            print("Debug: No valid query embeddings extracted")
+            return []
+        # Compute similarities for all query embeddings against filtered database
+        for query_embedding in query_embeddings:
+            for item in filtered_db:
+                similarity = 1 - cosine(query_embedding, np.array(item['embedding']))
+                if item['image_url'] in similarities:
+                    similarities[item['image_url']].append(similarity)
+                else:
+                    similarities[item['image_url']] = [similarity]
         # Aggregate similarities
         print(f"Debug: Total similarities found: {len(similarities)}")
         result = [url for _, url in aggregated_similarities[:top_n]]
         print(f"Debug: Returning {len(result)} recommendations")
         return result
+    def _extract_query_embedding(self, image_input: str, index: int) -> Any:
+        """Extract embedding from a single query image (for parallel processing)."""
+        try:
+            print(f"Debug: Processing query image {index+1}: {image_input}")
+            # Determine the type of image input
+            if image_input.startswith('http'):
+                # It's a URL
+                img = self.load_image_from_url(image_input)
+            elif image_input.startswith('data:image/'):
+                # It's a base64-encoded image
+                img = self.load_image_from_base64(image_input)
+            else:
+                # It's a local file path reference - convert to full Azure blob URL
+                blob_url = f"https://koottuprod.blob.core.windows.net/koottu-media/{image_input}"
+                img = self.load_image_from_url(blob_url)
+            if img is None:
+                print(f"Failed to load image: {image_input}")
+                return None
+            faces = self.app.get(img)
+            if len(faces) == 0:
+                print(f"Debug: No faces detected in query image {index+1}")
+                return None
+            return faces[0].embedding
+        except Exception as e:
+            print(f"Error extracting embedding from image {index+1}: {e}")
+            return None
     def find_similar_images_by_embedding(self, query_embedding: np.ndarray, gender: str = 'all', top_n: int = 10, excluded_images: List[str] = None) -> List[str]:
         """Find similar images based on a given embedding vector."""