Spaces:

jfang
/

embedding-helper

Running on CPU Upgrade

App Files Files Community

jichao commited on Apr 20, 2025

Commit

f46dce8

1 Parent(s): 8dbe4eb

add pooling

Browse files

Files changed (1) hide show

app.py +84 -26

app.py CHANGED Viewed

@@ -107,19 +107,21 @@ def get_preprocess(model_name: str):
     return transforms.Compose(transforms_list)
 # --- Embedding Function ---
-def get_embedding(image_pil: Image.Image, model_name: str) -> dict:
-    """Preprocesses an image, extracts the CLS token embedding for the selected model,
-    normalizes it, and returns a dictionary containing model info, embedding data (or null),
-    and a status message."""
     if image_pil is None:
         return {
             "model_name": model_name,
             "data": None,
             "message": "Error: Please upload an image."
         }
     if model_name not in MODEL_CONFIGS:
         return {
             "model_name": model_name,
             "data": None,
             "message": f"Error: Unknown model name '{model_name}'."
         }
@@ -135,6 +137,7 @@ def get_embedding(image_pil: Image.Image, model_name: str) -> dict:
             print(f"Error loading model {model_name}: {e}")
             return {
                 "model_name": model_name,
                 "data": None,
                 "message": error_msg
             }
@@ -148,15 +151,56 @@ def get_embedding(image_pil: Image.Image, model_name: str) -> dict:
         with torch.no_grad():
             features = selected_model.forward_features(img_tensor)
             if isinstance(features, tuple):
-                 features = features[0]
-            if len(features.shape) == 3:
-                cls_embedding = features[:, 0]
             else:
-                print(f"Warning: Unexpected feature shape for {model_name}: {features.shape}. Attempting to use as is.")
-                cls_embedding = features
-            normalized_embedding = torch.nn.functional.normalize(cls_embedding, p=2, dim=1)
         embedding_list = normalized_embedding.squeeze().cpu().numpy().tolist()
         if not isinstance(embedding_list, list):
@@ -164,17 +208,19 @@ def get_embedding(image_pil: Image.Image, model_name: str) -> dict:
         return {
             "model_name": model_name,
             "data": embedding_list,
             "message": "Success"
         }
     except Exception as e:
-        error_msg = f"Error processing image with model '{model_name}'. Check logs for details."
-        print(f"Error processing image with model {model_name}: {e}")
         import traceback
         traceback.print_exc() # Print detailed traceback to logs
         return {
             "model_name": model_name,
             "data": None,
             "message": error_msg
         }
@@ -188,9 +234,13 @@ examples = [[EXAMPLE_IMAGE, DEFAULT_MODEL_NAME]] if os.path.exists(EXAMPLE_IMAGE
 # Get list of model names for dropdown
 model_choices = list(MODEL_CONFIGS.keys())
 with gr.Blocks() as iface:
     gr.Markdown("## Image Embedding Calculator")
-    gr.Markdown("Upload an image and select a model to calculate its normalized CLS token embedding.")
     with gr.Row():
         with gr.Column(scale=1):
@@ -200,28 +250,36 @@ with gr.Blocks() as iface:
                 value=DEFAULT_MODEL_NAME,
                 label="Select Model"
             )
-            submit_btn = gr.Button("Calculate Embedding")
         with gr.Column(scale=2):
-            # Change output component to JSON
-            output_embedding = gr.JSON(label="Output (Embedding & Info)")
     if examples:
         gr.Examples(
-            examples=examples,
-            inputs=[input_image, model_selector],
-            outputs=output_embedding,
             fn=get_embedding,
-            cache_examples=False # Recompute if necessary, maybe True if inputs are static
         )
-    # Connect the button click to the function
-    submit_btn.click(
         fn=get_embedding,
-        inputs=[input_image, model_selector],
-        outputs=output_embedding,
-        api_name="predict" # Expose API endpoint
     )
-# --- Launch the App ---
 if __name__ == "__main__":
     iface.launch(server_name="0.0.0.0")

     return transforms.Compose(transforms_list)
 # --- Embedding Function ---
+def get_embedding(image_pil: Image.Image, model_name: str, embedding_method: str = 'cls') -> dict:
+    """Preprocesses an image, extracts embedding using the specified method for the
+    selected model, normalizes it, and returns a dictionary containing model info,
+    embedding data (or null), and a status message."""
     if image_pil is None:
         return {
             "model_name": model_name,
+            "embedding_method": embedding_method,
             "data": None,
             "message": "Error: Please upload an image."
         }
     if model_name not in MODEL_CONFIGS:
         return {
             "model_name": model_name,
+            "embedding_method": embedding_method,
             "data": None,
             "message": f"Error: Unknown model name '{model_name}'."
         }
             print(f"Error loading model {model_name}: {e}")
             return {
                 "model_name": model_name,
+                "embedding_method": embedding_method,
                 "data": None,
                 "message": error_msg
             }
         with torch.no_grad():
             features = selected_model.forward_features(img_tensor)
+            # features shape typically [batch_size, sequence_length, embedding_dim]
+            # For ViT, sequence_length = num_patches + 1 (CLS token)
             if isinstance(features, tuple):
+                 features = features[0] # Handle models returning tuples
+            if len(features.shape) == 3: # Expected shape [B, N, D]
+                if embedding_method == 'cls':
+                    embedding = features[:, 0] # Use the CLS token
+                    print(f"Using CLS token embedding for {model_name}.")
+                elif embedding_method == 'mean pooling':
+                    # Mean pool patch tokens (excluding CLS token)
+                    embedding = features[:, 1:].mean(dim=1)
+                    print(f"Using mean pooling embedding for {model_name}.")
+                elif embedding_method == 'gem pooling':
+                    # GeM pooling (Generalized Mean) - pool patch tokens
+                    p = 3.0
+                    patch_tokens = features[:, 1:] # Shape [B, num_patches, D]
+                    if patch_tokens.shape[1] == 0: # Check if there are any patch tokens
+                         print(f"Warning: No patch tokens found for GeM pooling in {model_name}. Falling back to CLS token.")
+                         embedding = features[:, 0] # Fallback to CLS
+                    else:
+                        # Ensure non-negativity before power + epsilon
+                        patch_tokens_non_negative = torch.relu(patch_tokens) + 1e-6
+                        # Calculate GeM
+                        embedding = torch.mean(patch_tokens_non_negative**p, dim=1)**(1./p)
+                        print(f"Using GeM pooling (p={p}) embedding for {model_name}.")
+                else:
+                    # Default or fallback to CLS if method is unknown
+                    print(f"Warning: Unknown embedding method '{embedding_method}'. Defaulting to CLS.")
+                    embedding = features[:, 0]
+            # Handle cases where forward_features might return a different shape
+            # (e.g., already pooled features [B, D])
+            elif len(features.shape) == 2:
+                 print(f"Warning: Unexpected feature shape {features.shape} for {model_name}. Using features directly.")
+                 embedding = features
             else:
+                 # Handle other unexpected shapes if necessary
+                 print(f"Error: Unexpected feature shape {features.shape} for {model_name}. Cannot extract embedding.")
+                 return {
+                    "model_name": model_name,
+                    "embedding_method": embedding_method,
+                    "data": None,
+                    "message": f"Error: Unexpected feature output shape from model '{model_name}'. Check logs."
+                 }
+            normalized_embedding = torch.nn.functional.normalize(embedding, p=2, dim=1)
         embedding_list = normalized_embedding.squeeze().cpu().numpy().tolist()
         if not isinstance(embedding_list, list):
         return {
             "model_name": model_name,
+            "embedding_method": embedding_method,
             "data": embedding_list,
             "message": "Success"
         }
     except Exception as e:
+        error_msg = f"Error processing image with model '{model_name}' ({embedding_method}). Check logs."
+        print(f"Error processing image with model {model_name} ({embedding_method}): {e}")
         import traceback
         traceback.print_exc() # Print detailed traceback to logs
         return {
             "model_name": model_name,
+            "embedding_method": embedding_method,
             "data": None,
             "message": error_msg
         }
 # Get list of model names for dropdown
 model_choices = list(MODEL_CONFIGS.keys())
+# Add embedding method choices
+embedding_method_choices = ['cls', 'mean pooling', 'gem pooling'] # Added 'gem pooling'
+default_embedding_method = 'cls'
 with gr.Blocks() as iface:
     gr.Markdown("## Image Embedding Calculator")
+    gr.Markdown("Upload an image, select a model, and choose an embedding method to calculate the normalized embedding.") # Updated description
     with gr.Row():
         with gr.Column(scale=1):
                 value=DEFAULT_MODEL_NAME,
                 label="Select Model"
             )
+            # --- Add the new dropdown here ---
+            embedding_method_selector = gr.Dropdown(
+                choices=embedding_method_choices,
+                value=default_embedding_method,
+                label="Select Embedding Method"
+            )
+            # --- ---
+            submit_button = gr.Button("Calculate Embedding")
         with gr.Column(scale=2):
+            output_json = gr.JSON(label="Output Embedding (JSON)")
     if examples:
+        # Add default embedding method to examples if using them
+        # Now includes the new 'gem pooling' option potentially for examples
+        examples_with_method = [[ex[0], ex[1], default_embedding_method] for ex in examples] # Might need adjustment if you want different methods in examples
         gr.Examples(
+            examples=examples_with_method,
+            inputs=[input_image, model_selector, embedding_method_selector], # Already includes the selector
+            outputs=output_json,
             fn=get_embedding,
+            cache_examples=False # Caching might be tricky with model loading
         )
+    # Update the button click handler to include the new selector
+    submit_button.click(
         fn=get_embedding,
+        inputs=[input_image, model_selector, embedding_method_selector], # Pass the new selector's value
+        outputs=output_json
     )
+# --- Launch the Gradio App ---
 if __name__ == "__main__":
     iface.launch(server_name="0.0.0.0")