Spaces:

jfang
/

embedding-helper

Running on CPU Upgrade

App Files Files Community

jichao commited on Mar 29, 2025

Commit

4a47660

1 Parent(s): b5fe4f8

first model added

Browse files

Files changed (2) hide show

app.py +195 -4
requirements.txt +7 -0

app.py CHANGED Viewed

@@ -1,7 +1,198 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+import torch
+import timm
+import numpy as np
+from PIL import Image
+import torchvision.transforms as transforms
+import os
+# Define available models
+MODELS = {
+    "mars-vit-b-0217": {
+        "path": os.path.join('models', 'checkpint-300.pth'),
+        "architecture": 'vit_base_patch16_224',
+        "img_size": 224,
+        "in_chans": 1,
+        "mean": [0.5],
+        "std": [0.25]
+    }
+    # Add more models here in the future
+}
+# Default model
+DEFAULT_MODEL = "mars-vit-b-0217"
+# Model cache to avoid reloading
+loaded_models = {}
+def get_transform(model_name):
+    """Get the appropriate transform for the model"""
+    model_config = MODELS.get(model_name, MODELS[DEFAULT_MODEL])
+    return transforms.Compose([
+        transforms.Resize((model_config["img_size"], model_config["img_size"])),
+        transforms.Grayscale(),  # Convert to grayscale (1 channel)
+        transforms.ToTensor(),
+        transforms.Normalize(mean=model_config["mean"], std=model_config["std"])
+    ])
+def load_model(model_name):
+    """Load the specified model"""
+    if model_name in loaded_models:
+        return loaded_models[model_name]
+    model_config = MODELS.get(model_name, MODELS[DEFAULT_MODEL])
+    model = timm.create_model(
+        model_config["architecture"],
+        img_size=model_config["img_size"],
+        in_chans=model_config["in_chans"],
+        num_classes=0,  # no head
+        global_pool='',  # no pooling
+    )
+    # Load converted weights
+    checkpoint = torch.load(model_config["path"], map_location='cpu', weights_only=False)
+    msg = model.load_state_dict(checkpoint['state_dict'], strict=False)
+    print(f"Loaded {model_name} weights with message: {msg}")
+    model.eval()  # Set model to evaluation mode
+    loaded_models[model_name] = model
+    return model
+# Load the default model at startup
+default_model = load_model(DEFAULT_MODEL)
+def get_embedding(image, model_name=DEFAULT_MODEL):
+    """Calculate embedding for an image using the specified model"""
+    if image is None:
+        return None, "No image provided"
+    try:
+        # Get the model
+        model = load_model(model_name)
+        # Convert to PIL Image if it's not already
+        if not isinstance(image, Image.Image):
+            image = Image.fromarray(image)
+        # Apply transformations
+        transform = get_transform(model_name)
+        img_tensor = transform(image).unsqueeze(0)  # Add batch dimension
+        # Get embedding
+        with torch.no_grad():
+            embedding = model(img_tensor)
+        # Convert to numpy and normalize
+        embedding_np = embedding.squeeze().cpu().numpy()
+        # Normalize embedding to unit length
+        embedding_norm = embedding_np / np.linalg.norm(embedding_np)
+        return embedding_norm, f"Embedding calculated successfully using {model_name}"
+    except Exception as e:
+        return None, f"Error calculating embedding: {str(e)}"
+def process_image(image, model_name=DEFAULT_MODEL):
+    """Process image and return embedding with visualization"""
+    embedding, message = get_embedding(image, model_name)
+    if embedding is None:
+        return None, None, message, None
+    # Create a simple visualization of the embedding (first 100 values)
+    import matplotlib.pyplot as plt
+    plt.figure(figsize=(10, 4))
+    plt.bar(range(min(100, len(embedding))), embedding[:100])
+    plt.title(f"Embedding Visualization ({model_name}, first 100 dimensions)")
+    plt.xlabel("Dimension")
+    plt.ylabel("Value")
+    # Save the plot to a temporary file
+    vis_path = "embedding_vis.png"
+    plt.savefig(vis_path)
+    plt.close()
+    # Return the processed image, embedding visualization, and message
+    return image, vis_path, message, embedding.tolist()
+# Define the Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Image Embedding Calculator")
+    gr.Markdown("Upload an image to calculate its embedding vector using a Vision Transformer model")
+    with gr.Tab("Interactive Demo"):
+        with gr.Row():
+            with gr.Column():
+                input_image = gr.Image(type="pil", label="Input Image")
+                model_dropdown = gr.Dropdown(
+                    choices=list(MODELS.keys()),
+                    value=DEFAULT_MODEL,
+                    label="Model"
+                )
+                submit_btn = gr.Button("Calculate Embedding")
+            with gr.Column():
+                output_image = gr.Image(type="pil", label="Processed Image")
+                output_vis = gr.Image(type="filepath", label="Embedding Visualization")
+                output_message = gr.Textbox(label="Status")
+                output_embedding = gr.JSON(label="Embedding Vector")
+        submit_btn.click(
+            fn=process_image,
+            inputs=[input_image, model_dropdown],
+            outputs=[output_image, output_vis, output_message, output_embedding]
+        )
+    with gr.Tab("API Documentation"):
+        gr.Markdown("""
+        ## API Usage
+        This application provides an API endpoint for calculating image embeddings.
+        ### Endpoint: `/api/predict`
+        **Method**: POST
+        **Input**:
+        - `image`: An image file
+        - `model_name`: (Optional) Name of the model to use (default: "mars-vit-b-0217")
+        **Output**:
+        ```json
+        {
+            "embedding": [...],  // The embedding vector
+            "message": "Status message",
+            "model_name": "mars-vit-b-0217"  // The model used
+        }
+        ```
+        ### Example using Python requests:
+        ```python
+        import requests
+        response = requests.post(
+            "https://yourusername-embedding-helper.hf.space/api/predict",
+            files={"image": open("your_image.jpg", "rb")},
+            data={"model_name": "mars-vit-b-0217"}
+        )
+        result = response.json()
+        embedding = result["embedding"]
+        ```
+        """)
+# Define API endpoint function
+def api_predict(image, model_name=DEFAULT_MODEL):
+    embedding, message = get_embedding(image, model_name)
+    if embedding is None:
+        return {"embedding": None, "message": message, "model_name": model_name}
+    return {"embedding": embedding.tolist(), "message": message, "model_name": model_name}
+# Mount the API endpoint
+demo.queue()
+demo = gr.mount_gradio_app(app=demo, blocks=demo, path="/")
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch>=2.0.0
+torchvision>=0.10.0
+timm>=1.0.0
+gradio
+numpy<2.0.0
+Pillow>=8.3.1
+matplotlib>=3.5.0