Spaces:

ptschandl
/

zero_shot_classification_pmc

Sleeping

App Files Files Community

ptschandl commited on Sep 16, 2025

Commit

065ca40

verified ·

1 Parent(s): 00f18b0

Create app.py

Browse files

Files changed (1) hide show

app.py +62 -0

app.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import torch
+import gradio as gr
+from PIL import Image
+from urllib.request import urlopen
+from open_clip import create_model_from_pretrained, get_tokenizer
+# Load the model and tokenizer from the Hugging Face Hub
+model, preprocess = create_model_from_pretrained('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')
+tokenizer = get_tokenizer('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')
+# Zero-shot image classification
+template = 'this is a photo of '
+# Device configuration
+device = torch.device('mps') if torch.mps.is_available() else torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+model.to(device)
+model.eval()
+def classify_image(image, candidate_labels):
+    # Convert candidate_labels string to a list
+    labels = [label.strip() for label in candidate_labels.split(",")]
+    context_length = 256
+    # Preprocess the image
+    image_input = preprocess(image).unsqueeze(0).to(device)
+    # Tokenize the candidate labels
+    texts = tokenizer([template + label for label in labels], context_length=context_length).to(device)
+    # Perform inference
+    with torch.no_grad():
+        image_features, text_features, logit_scale = model(image_input, texts)
+        logits = (logit_scale * image_features @ text_features.t()).detach().softmax(dim=-1)
+        sorted_indices = torch.argsort(logits, dim=-1, descending=True)
+        logits = logits.cpu().numpy()
+        sorted_indices = sorted_indices.cpu().numpy()
+    # Prepare the results
+    results = []
+    for j in range(len(labels)):
+        jth_index = sorted_indices[0][j]
+        results.append({
+            "label": labels[jth_index],
+            "score": float(logits[0][jth_index])
+        })
+    return results
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=classify_image,
+    inputs=[
+        gr.Image(type="pil", label="Upload Image"),
+        gr.Textbox(lines=2, placeholder="Enter candidate labels, separated by commas..."),
+    ],
+    outputs=gr.JSON(),
+    title="Zero-Shot Image Classification",
+    description="Upload an image and enter candidate labels to classify the image."
+)
+# Launch the interface
+iface.launch()