Spaces:

jfang
/

embedding-helper

Running on CPU Upgrade

App Files Files Community

jichao commited on Mar 29, 2025

Commit

87188fa

1 Parent(s): 0e5bea3

fix

Browse files

Files changed (2) hide show

app.py +203 -185
requirements.txt +4 -7

app.py CHANGED Viewed

@@ -1,209 +1,227 @@
 import gradio as gr
 import torch
 import timm
-import numpy as np
 from PIL import Image
-import torchvision.transforms as transforms
 import os
-# Define available models
-MODELS = {
-    "mars-vit-b-0217": {
-        "path": os.path.join('models', 'checkpint-300.pth'),
-        "architecture": 'vit_base_patch16_224',
-        "img_size": 224,
         "in_chans": 1,
-        "mean": [0.5],
-        "std": [0.25]
-    }
-    # Add more models here in the future
 }
-# Default model
-DEFAULT_MODEL = "mars-vit-b-0217"
-# Model cache to avoid reloading
-loaded_models = {}
-def get_transform(model_name):
-    """Get the appropriate transform for the model"""
-    model_config = MODELS.get(model_name, MODELS[DEFAULT_MODEL])
-    return transforms.Compose([
-        transforms.Resize((model_config["img_size"], model_config["img_size"])),
-        transforms.Grayscale(),  # Convert to grayscale (1 channel)
-        transforms.ToTensor(),
-        transforms.Normalize(mean=model_config["mean"], std=model_config["std"])
-    ])
-def load_model(model_name):
-    """Load the specified model"""
-    if model_name in loaded_models:
-        return loaded_models[model_name]
-    model_config = MODELS.get(model_name, MODELS[DEFAULT_MODEL])
     model = timm.create_model(
-        model_config["architecture"],
-        img_size=model_config["img_size"],
-        in_chans=model_config["in_chans"],
-        num_classes=0,  # no head
-        global_pool='',  # no pooling
     )
-    # Load converted weights
-    checkpoint = torch.load(model_config["path"], map_location='cpu', weights_only=False)
-    msg = model.load_state_dict(checkpoint['state_dict'], strict=False)
-    print(f"Loaded {model_name} weights with message: {msg}")
-    model.eval()  # Set model to evaluation mode
-    loaded_models[model_name] = model
     return model
-# Load the default model at startup
-default_model = load_model(DEFAULT_MODEL)
-def get_embedding(image, model_name=DEFAULT_MODEL):
-    """Calculate embedding for an image using the specified model"""
-    if image is None:
-        return None, "No image provided"
     try:
-        # Get the model
-        model = load_model(model_name)
-        # Convert to PIL Image if it's not already
-        if not isinstance(image, Image.Image):
-            image = Image.fromarray(image)
-        # Apply transformations
-        transform = get_transform(model_name)
-        img_tensor = transform(image).unsqueeze(0)  # Add batch dimension
-        # Get embedding
         with torch.no_grad():
-            embedding = model(img_tensor)
-        # Convert to numpy and normalize
-        embedding_np = embedding.squeeze().cpu().numpy()
-        # Normalize embedding to unit length
-        embedding_norm = embedding_np / np.linalg.norm(embedding_np)
-        return embedding_norm, f"Embedding calculated successfully using {model_name}"
     except Exception as e:
-        return None, f"Error calculating embedding: {str(e)}"
-def process_image(image, model_name=DEFAULT_MODEL):
-    """Process image and return embedding with visualization"""
-    embedding, message = get_embedding(image, model_name)
-    if embedding is None:
-        return None, None, message, None
-    # Create a simple visualization of the embedding (first 100 values)
-    import matplotlib.pyplot as plt
-    plt.figure(figsize=(10, 4))
-    plt.bar(range(min(100, len(embedding))), embedding[:100])
-    plt.title(f"Embedding Visualization ({model_name}, first 100 dimensions)")
-    plt.xlabel("Dimension")
-    plt.ylabel("Value")
-    # Save the plot to a temporary file
-    vis_path = "embedding_vis.png"
-    plt.savefig(vis_path)
-    plt.close()
-    # Return the processed image, embedding visualization, and message
-    return image, vis_path, message, embedding.tolist()
-# Define API endpoint function
-def api_predict(image, model_name=DEFAULT_MODEL):
-    embedding, message = get_embedding(image, model_name)
-    if embedding is None:
-        return {"embedding": None, "message": message, "model_name": model_name}
-    return {"embedding": embedding.tolist(), "message": message, "model_name": model_name}
-# Set up the Gradio interface with API
-demo = gr.Blocks()
-with demo:
-    gr.Markdown("# Image Embedding Calculator")
-    gr.Markdown("Upload an image to calculate its embedding vector using a Vision Transformer model")
-    with gr.Tab("Interactive Demo"):
-        with gr.Row():
-            with gr.Column():
-                input_image = gr.Image(type="pil", label="Input Image")
-                model_dropdown = gr.Dropdown(
-                    choices=list(MODELS.keys()),
-                    value=DEFAULT_MODEL,
-                    label="Model"
-                )
-                submit_btn = gr.Button("Calculate Embedding")
-            with gr.Column():
-                output_image = gr.Image(type="pil", label="Processed Image")
-                output_vis = gr.Image(type="filepath", label="Embedding Visualization")
-                output_message = gr.Textbox(label="Status")
-                output_embedding = gr.JSON(label="Embedding Vector")
-        submit_btn.click(
-            fn=process_image,
-            inputs=[input_image, model_dropdown],
-            outputs=[output_image, output_vis, output_message, output_embedding]
-        )
-    with gr.Tab("API Documentation"):
-        gr.Markdown("""
-        ## API Usage
-        This application provides an API endpoint for calculating image embeddings.
-        ### Endpoint: `/api/predict`
-        **Method**: POST
-        **Input**:
-        - `image`: An image file
-        - `model_name`: (Optional) Name of the model to use (default: "mars-vit-b-0217")
-        **Output**:
-        ```json
-        {
-            "embedding": [...],  // The embedding vector
-            "message": "Status message",
-            "model_name": "mars-vit-b-0217"  // The model used
         }
-        ```
-        ### Example using Python requests:
-        ```python
-        import requests
-        response = requests.post(
-            "https://yourusername-embedding-helper.hf.space/api/predict",
-            files={"image": open("your_image.jpg", "rb")},
-            data={"model_name": "mars-vit-b-0217"}
         )
-        result = response.json()
-        embedding = result["embedding"]
-        ```
-        """)
-# Create the API endpoint
-api_predict_interface = gr.Interface(
-    fn=api_predict,
-    inputs=[
-        gr.Image(type="pil"),
-        gr.Textbox(default=DEFAULT_MODEL, label="Model Name")
-    ],
-    outputs=gr.JSON(),
-    title="Image Embedding API",
-    description="API for calculating image embeddings",
-    allow_flagging="never"
-)
-# Launch the app with the API
 if __name__ == "__main__":
-    demo.launch(share=False)

 import gradio as gr
 import torch
 import timm
+from torchvision import transforms
 from PIL import Image
+import numpy as np
 import os
+# --- Model Configuration ---
+DEFAULT_MODEL_NAME = "mars-ctx-vitb-0217"
+MODEL_CONFIGS = {
+    "mars-ctx-vitb-0217": {
+        "path": "models/checkpoint-300.pth",
+        "timm_id": "vit_base_patch16_224",
         "in_chans": 1,
+        "description": "ViT-Base/16 (Grayscale Input)"
+    },
+    # --- Add more model configurations here ---
+    # "another_model_name": {
+    #     "path": "models/another_checkpoint.pth",
+    #     "timm_id": "vit_small_patch16_224",
+    #     "in_chans": 3, # Example: RGB model
+    #     "description": "ViT-Small/16 (RGB Input)"
+    # },
 }
+# Global dictionary to store loaded models
+LOADED_MODELS = {}
+# --- Model Loading Function ---
+def load_model(model_name: str):
+    """Loads a model based on its name from MODEL_CONFIGS."""
+    if model_name not in MODEL_CONFIGS:
+        raise ValueError(f"Unknown model name: {model_name}")
+    config = MODEL_CONFIGS[model_name]
+    model_path = config["path"]
+    timm_id = config["timm_id"]
+    in_chans = config.get("in_chans", 3) # Default to 3 channels if not specified
+    print(f"Loading model: {model_name} ({timm_id}) from {model_path}")
     model = timm.create_model(
+        timm_id,
+        img_size=224,
+        in_chans=in_chans,
+        num_classes=0,    # No classification head
+        global_pool='',   # No pooling - we want the CLS token feature
+        pretrained=False  # Don't load timm pretrained weights, we use our checkpoint
     )
+    # Ensure the directory exists before checking the file
+    model_dir = os.path.dirname(model_path)
+    if model_dir and not os.path.exists(model_dir):
+         print(f"Creating directory: {model_dir}")
+         os.makedirs(model_dir, exist_ok=True)
+    if not os.path.exists(model_path):
+        print(f"Warning: Model checkpoint not found at {model_path}. Using random weights for {model_name}.")
+        model.eval() # Still set to eval mode
+        return model # Return untrained model if checkpoint missing
+    try:
+        checkpoint = torch.load(model_path, map_location='cpu', weights_only=False)
+        state_dict = checkpoint.get('state_dict', checkpoint)
+        # Handle potential mismatches if loading weights from a different architecture/head
+        msg = model.load_state_dict(state_dict, strict=False)
+        print(f"Loaded weights for {model_name} from {model_path}. Load message: {msg}")
+        if msg.missing_keys or msg.unexpected_keys:
+            print(f"Note: There were missing or unexpected keys during weight loading for {model_name}. Check compatibility.")
+    except Exception as e:
+        print(f"Error loading checkpoint for {model_name} from {model_path}: {e}")
+        print(f"Proceeding with randomly initialized weights for {model_name}.")
+    model.eval() # Set model to evaluation mode
     return model
+# --- Pre-load Default Model --- (Or load on demand in get_embedding)
+try:
+    print(f"Pre-loading default model: {DEFAULT_MODEL_NAME}...")
+    LOADED_MODELS[DEFAULT_MODEL_NAME] = load_model(DEFAULT_MODEL_NAME)
+    print(f"Default model {DEFAULT_MODEL_NAME} loaded successfully.")
+except Exception as e:
+    print(f"ERROR: Failed to pre-load default model {DEFAULT_MODEL_NAME}: {e}")
+    # Decide how to handle this - exit, or let Gradio fail later?
+    # For now, we'll print the error and continue; the app might fail if the default model is needed.
+# --- Image Preprocessing --- (Now depends on model input channels)
+def get_preprocess(model_name: str):
+    """Returns the appropriate preprocessing transform for the model."""
+    config = MODEL_CONFIGS.get(model_name, MODEL_CONFIGS[DEFAULT_MODEL_NAME]) # Fallback to default
+    in_chans = config.get('in_chans', 3)
+    mean = [0.5] * in_chans
+    std = [0.25] * in_chans # Assuming same normalization for now
+    transforms_list = [
+        transforms.Resize((224, 224)),
+    ]
+    if in_chans == 1:
+        transforms_list.append(transforms.Grayscale(num_output_channels=1))
+    transforms_list.extend([
+        transforms.ToTensor(),
+        transforms.Normalize(mean=mean, std=std),
+    ])
+    return transforms.Compose(transforms_list)
+# --- Embedding Function ---
+def get_embedding(image_pil: Image.Image, model_name: str) -> dict:
+    """Preprocesses an image, extracts the CLS token embedding for the selected model,
+    normalizes it, and returns a dictionary containing model info, embedding data (or null),
+    and a status message."""
+    if image_pil is None:
+        return {
+            "model_name": model_name,
+            "data": None,
+            "message": "Error: Please upload an image."
+        }
+    if model_name not in MODEL_CONFIGS:
+        return {
+            "model_name": model_name,
+            "data": None,
+            "message": f"Error: Unknown model name '{model_name}'."
+        }
+    # --- Get the model (load if not already loaded) ---
+    if model_name not in LOADED_MODELS:
+        try:
+            print(f"Loading model {model_name} on demand...")
+            LOADED_MODELS[model_name] = load_model(model_name)
+            print(f"Model {model_name} loaded successfully.")
+        except Exception as e:
+            error_msg = f"Error loading model '{model_name}'. Check logs."
+            print(f"Error loading model {model_name}: {e}")
+            return {
+                "model_name": model_name,
+                "data": None,
+                "message": error_msg
+            }
+    selected_model = LOADED_MODELS[model_name]
+    preprocess = get_preprocess(model_name)
     try:
+        # Preprocess based on the selected model's requirements
+        img_tensor = preprocess(image_pil).unsqueeze(0) # Add batch dimension [1, C, H, W]
         with torch.no_grad():
+            features = selected_model.forward_features(img_tensor)
+            if isinstance(features, tuple):
+                 features = features[0]
+            if len(features.shape) == 3:
+                cls_embedding = features[:, 0]
+            else:
+                print(f"Warning: Unexpected feature shape for {model_name}: {features.shape}. Attempting to use as is.")
+                cls_embedding = features
+            normalized_embedding = torch.nn.functional.normalize(cls_embedding, p=2, dim=1)
+        embedding_list = normalized_embedding.squeeze().cpu().numpy().tolist()
+        if not isinstance(embedding_list, list):
+             embedding_list = [embedding_list] # Ensure it's always a list
+        return {
+            "model_name": model_name,
+            "data": embedding_list,
+            "message": "Success"
+        }
     except Exception as e:
+        error_msg = f"Error processing image with model '{model_name}'. Check logs for details."
+        print(f"Error processing image with model {model_name}: {e}")
+        import traceback
+        traceback.print_exc() # Print detailed traceback to logs
+        return {
+            "model_name": model_name,
+            "data": None,
+            "message": error_msg
         }
+# --- Gradio Interface ---
+EXAMPLE_DIR = "examples"
+EXAMPLE_IMAGE = os.path.join(EXAMPLE_DIR, "sample_image.png")
+os.makedirs(EXAMPLE_DIR, exist_ok=True)
+examples = [[EXAMPLE_IMAGE, DEFAULT_MODEL_NAME]] if os.path.exists(EXAMPLE_IMAGE) else None
+# Get list of model names for dropdown
+model_choices = list(MODEL_CONFIGS.keys())
+with gr.Blocks() as iface:
+    gr.Markdown("## Image Embedding Calculator")
+    gr.Markdown("Upload an image and select a model to calculate its normalized CLS token embedding.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_image = gr.Image(type="pil", label="Upload Image")
+            model_selector = gr.Dropdown(
+                choices=model_choices,
+                value=DEFAULT_MODEL_NAME,
+                label="Select Model"
+            )
+            submit_btn = gr.Button("Calculate Embedding")
+        with gr.Column(scale=2):
+            # Change output component to JSON
+            output_embedding = gr.JSON(label="Output (Embedding & Info)")
+    if examples:
+        gr.Examples(
+            examples=examples,
+            inputs=[input_image, model_selector],
+            outputs=output_embedding,
+            fn=get_embedding,
+            cache_examples=False # Recompute if necessary, maybe True if inputs are static
         )
+    # Connect the button click to the function
+    submit_btn.click(
+        fn=get_embedding,
+        inputs=[input_image, model_selector],
+        outputs=output_embedding,
+        api_name="predict" # Expose API endpoint
+    )
+# --- Launch the App ---
 if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0")

requirements.txt CHANGED Viewed

@@ -1,7 +1,4 @@
-torch>=2.0.0
-torchvision>=0.10.0
-timm>=1.0.0
-gradio
-numpy<2.0.0
-Pillow>=8.3.1
-matplotlib>=3.5.0

+torch
+timm
+torchvision
+Pillow