Spaces:

cfoli
/

SemanticVision

Sleeping

App Files Files Community

cfoli commited on Feb 14

Commit

3e94470

verified ·

1 Parent(s): 5168a3d

Upload zero_shot_classification.py

Browse files

Files changed (1) hide show

zero_shot_classification.py +153 -0

zero_shot_classification.py ADDED Viewed

	@@ -0,0 +1,153 @@

+# -*- coding: utf-8 -*-
+"""zero-shot-classification.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1dme6a-Yhl1xYbXobqu56YnjKyr0q9VbL
+### Summary
+---
+These models perform zero-shot classification of images of "homographs" (words that could be associated with different meanings/objects/concepts, whithout any changes in spelling or pronunciation)
+###### *** 02.2026
+### Load dependencies
+---
+"""
+import torch
+from transformers import pipeline
+import os
+import gradio
+"""### Initialize containers
+---
+"""
+MODEL_CACHE = {}
+MODEL_OPTIONS = {
+    "CLIP-base":    "openai/clip-vit-base-patch32",
+    "CLIP-large":   "openai/clip-vit-large-patch14",
+    "SigLIP-base":  "google/siglip-base-patch16-224",
+    "SigLIP-large": "google/siglip-so400m-patch14-384"}
+CANDIDATE_LABELS = ["a bat (baseball)", "a bat (mammal)",
+                    "a flower (plant)", "flour (baking powder)",
+                    "a mouse (mammal)", "a mouse (electronic)",
+                    "a nail (human finger)", "a nail (metal)",
+                    "a nut (fruit seed)", "a nut (metal)"]
+LABELS_MAP = ["Bat (baseball)", "Bat (mammal)",
+              "Flower (plant)", "Flour (baking powder)",
+              "Mouse (mammal)", "Mouse (electronic)",
+              "Nail (human finger)", "Nail (metal)",
+              "Nut (fruit seed)", "Nut (metal)"]
+"""### Select, load and run pre-trained zero-shot multi-modal model
+---
+"""
+model_key  = "CLIP-large"
+# Load model (cache for speed)
+if model_key not in MODEL_CACHE:
+  MODEL_CACHE[model_key] = pipeline(task  = "zero-shot-image-classification",
+                      model = MODEL_OPTIONS[model_key])
+classifier = MODEL_CACHE[model_key]
+BASE_DIR = '/content/drive/MyDrive/ML Projects/Zero-shot Image Classification/Images'
+image_path = os.path.join(BASE_DIR, 'Mouse1_2.png')
+output = classifier(
+    image               = image_path,
+    candidate_labels    = CANDIDATE_LABELS,
+    hypothesis_template = "This image shows {}")
+print("\n\n=============================================================================")
+print(f"\nPrediction: This image shows {output[0]["label"]} | Confidence (probability): {100*output[0]["score"]: .1f}%")
+def run_classifer(model_key, image_path, prob_threshold = None):
+    # model_key: name of backbone zero-shot-image-classification model to use
+    # image_path: path to test image
+    # prob_threshold: confidence (i.e., probability) threshold above which to consider a prediction valid
+    device = 0 if torch.cuda.is_available() else -1
+    # Load model (cache for speed)
+    if model_key not in MODEL_CACHE:
+      MODEL_CACHE[model_key]     = pipeline(task  = "zero-shot-image-classification",
+                          model  = MODEL_OPTIONS[model_key],
+                          device = device)
+    classifier = MODEL_CACHE[model_key]
+    outputs = classifier(
+        image               = image_path,
+        candidate_labels    = CANDIDATE_LABELS,
+        hypothesis_template = "This image shows {}")
+    # label_str = f"This image shows {output[0]["label"]}",
+    # prob_str  = f"{100*output[0]["score"]: .1f}%"
+    label_lookup = dict(zip(CANDIDATE_LABELS, LABELS_MAP))
+    # Dictionary mapping all candidate labels to their predicted probabilities
+    prob_dict    = {label_lookup[output['label']]: round(output["score"], 4) for output in outputs}
+    predicted_label_str = f"This image shows {outputs[0]['label']} | Confidence (probability): {100*outputs[0]['score']:.1f}%" if outputs[0]['score'] > prob_threshold else "No prediction"
+    return predicted_label_str, prob_dict
+# example run
+model_key  = "CLIP-large"
+BASE_DIR   = '/content/drive/MyDrive/ML Projects/Zero-shot Image Classification/Images'
+image_path = os.path.join(BASE_DIR, 'Nail2_1.png')
+predicted_label_str, prob_dict = run_classifer(model_key, image_path, prob_threshold = 0.4)
+print("\n\n=============================================================================")
+# print(f"\nPrediction: {predicted_label_str} | Confidence (probability): {100*output[0]['score']:.1f}%")
+print(f"\nPrediction: {predicted_label_str}")
+prob_dict
+"""### Gradio App
+---
+"""
+example_list_dir       = os.path.join(os.getcwd(), "Example images")
+example_list_img_names = os.listdir(example_list_dir)
+example_list = [
+    ["CLIP-large", os.path.join(os.getcwd(), example_img)]
+    for example_img in example_list_img_names
+    if example_img.lower().endswith(".png")]
+gradio_app = gradio.Interface(
+    fn     = run_classifer,
+    inputs = [gradio.Dropdown(["CLIP-base", "CLIP-large", "SigLIP-base", "SigLIP-large"], value="CLIP-large", label  = "Select Classifier"),
+              gradio.Image(type="pil", label="Load sample image here"),
+              gradio.Slider(minimum = 0.1, maximum = 0.9, step = 0.05, value = 0.25, label = "Set Prediction Threshold")
+              ],
+    outputs = [gradio.Textbox(label="Image Classification"),
+             gradio.Label(label="Prediction Probabilities", show_label=False)],
+    examples       = example_list,
+    cache_examples = True,
+    title          = "ChestVision",
+    description    = "Multi-modal models for zero-shot classification of images of homophones and homographs",
+    article        = "Author: C. Foli (02.2026) | Website: coming soon...")
+gradio_app.launch()