Spaces:

HF-Pawan
/

Open-AI-Zero-Shot-Image-Classification

Running

App Files Files Community

anyonehomep1mane commited on Feb 4

Commit

5aa6736

1 Parent(s): e344222

Code Changes

Browse files

Files changed (19) hide show

app.py +16 -117
cat.jpg → assets/cat.jpg +0 -0
fridge.jpg → assets/fridge.jpg +0 -0
zebra.jpg → assets/zebra.jpg +0 -0
config/__pycache__/settings.cpython-310.pyc +0 -0
config/settings.py +8 -0
core/__pycache__/inference.cpython-310.pyc +0 -0
core/__pycache__/model_loader.cpython-310.pyc +0 -0
core/inference.py +24 -0
core/model_loader.py +15 -0
ui/__pycache__/layout.cpython-310.pyc +0 -0
ui/__pycache__/styles.cpython-310.pyc +0 -0
ui/__pycache__/theme.cpython-310.pyc +0 -0
ui/layout.py +43 -0
ui/styles.py +16 -0
ui/theme.py +28 -0
utils/__pycache__/warnings.cpython-310.pyc +0 -0
utils/warnings.py +4 -0
version_one_app.py +125 -0

app.py CHANGED Viewed

@@ -1,125 +1,24 @@
-import torch
-from transformers import AutoModel, AutoProcessor
-import gradio as gr
-from PIL import Image
-from gradio.themes import Soft
-from gradio.themes.utils import colors, fonts, sizes
-import warnings
-warnings.filterwarnings(action="ignore")
-colors.orange_red = colors.Color(
-    name="orange_red",
-    c50="#FFF0E5", c100="#FFE0CC", c200="#FFC299", c300="#FFA366",
-    c400="#FF8533", c500="#FF4500", c600="#E63E00", c700="#CC3700",
-    c800="#B33000", c900="#992900", c950="#802200",
-)
-class OrangeRedTheme(Soft):
-    def __init__(self):
-        super().__init__(
-            primary_hue=colors.orange_red,
-            secondary_hue=colors.orange_red,
-            neutral_hue=colors.slate,
-            text_size=sizes.text_lg,
-            font=(fonts.GoogleFont("Outfit"), "Arial", "sans-serif"),
-            font_mono=(fonts.GoogleFont("IBM Plex Mono"), "monospace"),
-        )
-        super().set(
-            body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
-            button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
-            button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
-            button_primary_text_color="white",
-            block_border_width="3px",
-            block_shadow="*shadow_drop_lg",
-        )
-orange_red_theme = OrangeRedTheme()
-MODEL_ID = "openai/clip-vit-base-patch32"
-model = AutoModel.from_pretrained(
-    MODEL_ID,
-    torch_dtype=torch.bfloat16,
-    attn_implementation="sdpa"
-)
-processor = AutoProcessor.from_pretrained(MODEL_ID)
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = model.to(device)
-def postprocess_metaclip(probs, labels):
-    return {labels[i]: probs[0][i].item() for i in range(len(labels))}
-def metaclip_detector(image, texts):
-    inputs = processor(text=texts, images=image, return_tensors="pt", padding=True)
-    with torch.no_grad():
-        outputs = model(**inputs)
-        probs = outputs.logits_per_image.softmax(dim=1)
-    return probs
-def infer(image, candidate_labels):
-    candidate_labels = [l.strip() for l in candidate_labels.split(",")]
-    probs = metaclip_detector(image, candidate_labels)
-    return postprocess_metaclip(probs, labels=candidate_labels)
-css_style = """
-#container {
-    max-width: 1280px;   /* wider layout */
-    margin: auto;
-}
-@media (min-width: 1600px) {
-    #container {
-        max-width: 1440px;
-    }
-}
-#title h1 {
-    font-size: 2.4em !important;
-}
-"""
-with gr.Blocks(title="AI Document Summarizer") as demo:
-    with gr.Column(elem_id="container"):
-        gr.Markdown("# **Open AI Zero-Shot Classification**", elem_id="title")
-        gr.Markdown("This is the demo of model 'openai/clip-vit-base-patch32' for zero-shot classification.")
-        with gr.Row(equal_height=True):
-            with gr.Column():
-                image_input = gr.Image(type="pil", label="Upload Image", height=310)
-                text_input = gr.Textbox(label="Input labels (comma separated)")
-                run_button = gr.Button("Run", variant="primary")
-            with gr.Column():
-                metaclip_output = gr.Label(
-                    label="Open AI Zero-Shot Classification Output",
-                    num_top_classes=5
-                )
-        with gr.Row(equal_height=True):
-            gr.Examples(
-                examples=[
-                    ["./zebra.jpg", "a photo of a zebra, a photo of a horse, a photo of a donkey"],
-                    ["./cat.jpg", "a photo of a cat, a photo of two cats, a photo of three cats"],
-                    ["./fridge.jpg", "a photo of a fridge, a photo of a cupboard, a photo of a wardrobe"]
-                ],
-                inputs=[image_input, text_input],
-                outputs=[metaclip_output],
-                fn=infer,
-            )
-        run_button.click(
-            fn=infer,
-            inputs=[image_input, text_input],
-            outputs=[metaclip_output]
-        )
-if __name__ == "__main__":
     demo.queue().launch(
-        theme=orange_red_theme,
-        css=css_style,
         show_error=True,
         server_name="0.0.0.0",
         server_port=7860,
         debug=True
-    )

+from utils.warnings import suppress_warnings
+from core.model_loader import load_model
+from ui.theme import OrangeRedTheme
+from ui.styles import CSS_STYLE
+from ui.layout import build_ui
+def main():
+    suppress_warnings()
+    model, processor = load_model()
+    theme = OrangeRedTheme()
+    demo = build_ui(model, processor)
     demo.queue().launch(
+        theme=theme,
+        css=CSS_STYLE,
         show_error=True,
         server_name="0.0.0.0",
         server_port=7860,
         debug=True
+    )
+if __name__ == "__main__":
+    main()

cat.jpg → assets/cat.jpg RENAMED Viewed

File without changes

fridge.jpg → assets/fridge.jpg RENAMED Viewed

File without changes

zebra.jpg → assets/zebra.jpg RENAMED Viewed

File without changes

config/__pycache__/settings.cpython-310.pyc ADDED Viewed

Binary file (361 Bytes). View file

config/settings.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import torch
+MODEL_ID = "openai/clip-vit-base-patch32"
+TORCH_DTYPE = torch.bfloat16
+ATTN_IMPLEMENTATION = "sdpa"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

core/__pycache__/inference.cpython-310.pyc ADDED Viewed

Binary file (1.3 kB). View file

core/__pycache__/model_loader.cpython-310.pyc ADDED Viewed

Binary file (652 Bytes). View file

core/inference.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import torch
+from config.settings import DEVICE
+def post_processed_probs(probs, labels):
+    return {labels[i]: probs[0][i].item() for i in range(len(labels))}
+def generate_ouput(model, processor, image, texts):
+    inputs = processor(
+        text=texts,
+        images=image,
+        return_tensors="pt",
+        padding=True
+    ).to(DEVICE)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probs = outputs.logits_per_image.softmax(dim=1)
+    return probs
+def infer(model, processor, image, candidate_labels):
+    labels = [l.strip() for l in candidate_labels.split(",")]
+    probs = generate_ouput(model, processor, image, labels)
+    return post_processed_probs(probs, labels)

core/model_loader.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import torch
+from transformers import AutoModel, AutoProcessor
+from config.settings import MODEL_ID, TORCH_DTYPE, ATTN_IMPLEMENTATION, DEVICE
+def load_model():
+    model = AutoModel.from_pretrained(
+        MODEL_ID,
+        torch_dtype=TORCH_DTYPE,
+        attn_implementation=ATTN_IMPLEMENTATION
+    )
+    model = model.to(DEVICE)
+    model.eval()
+    processor = AutoProcessor.from_pretrained(MODEL_ID)
+    return model, processor

ui/__pycache__/layout.cpython-310.pyc ADDED Viewed

Binary file (1.91 kB). View file

ui/__pycache__/styles.cpython-310.pyc ADDED Viewed

Binary file (367 Bytes). View file

ui/__pycache__/theme.cpython-310.pyc ADDED Viewed

Binary file (1.5 kB). View file

ui/layout.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import gradio as gr
+from core.inference import infer
+def build_ui(model, processor):
+    with gr.Blocks(title="AI Document Summarizer") as demo:
+        with gr.Column(elem_id="container"):
+            gr.Markdown("# **Open AI Zero-Shot Classification**", elem_id="title")
+            gr.Markdown(
+                "This is the demo of model **openai/clip-vit-base-patch32** "
+                "for zero-shot image classification."
+            )
+            with gr.Row(equal_height=True):
+                with gr.Column():
+                    image_input = gr.Image(type="pil", label="Upload Image", height=310)
+                    text_input = gr.Textbox(label="Input labels (comma separated)")
+                    run_button = gr.Button("Run", variant="primary")
+                with gr.Column():
+                    output = gr.Label(
+                        label="Open AI Zero-Shot Classification Output",
+                        num_top_classes=5
+                    )
+            with gr.Row(equal_height=True):
+                gr.Examples(
+                    examples=[
+                        ["./assets/zebra.jpg", "a photo of a zebra, a photo of a horse, a photo of a donkey"],
+                        ["./assets/cat.jpg", "a photo of a cat, a photo of two cats, a photo of three cats"],
+                        ["./assets/fridge.jpg", "a photo of a fridge, a photo of a cupboard, a photo of a wardrobe"]
+                    ],
+                    inputs=[image_input, text_input],
+                    outputs=[output],
+                    fn=lambda img, txt: infer(model, processor, img, txt)
+                )
+            run_button.click(
+                fn=lambda img, txt: infer(model, processor, img, txt),
+                inputs=[image_input, text_input],
+                outputs=[output]
+            )
+    return demo

ui/styles.py ADDED Viewed

	@@ -0,0 +1,16 @@

+CSS_STYLE = """
+#container {
+    max-width: 1280px;
+    margin: auto;
+}
+@media (min-width: 1600px) {
+    #container {
+        max-width: 1440px;
+    }
+}
+#title h1 {
+    font-size: 2.4em !important;
+}
+"""

ui/theme.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from gradio.themes import Soft
+from gradio.themes.utils import colors, fonts, sizes
+colors.orange_red = colors.Color(
+    name="orange_red",
+    c50="#FFF0E5", c100="#FFE0CC", c200="#FFC299", c300="#FFA366",
+    c400="#FF8533", c500="#FF4500", c600="#E63E00", c700="#CC3700",
+    c800="#B33000", c900="#992900", c950="#802200",
+)
+class OrangeRedTheme(Soft):
+    def __init__(self):
+        super().__init__(
+            primary_hue=colors.orange_red,
+            secondary_hue=colors.orange_red,
+            neutral_hue=colors.slate,
+            text_size=sizes.text_lg,
+            font=(fonts.GoogleFont("Outfit"), "Arial", "sans-serif"),
+            font_mono=(fonts.GoogleFont("IBM Plex Mono"), "monospace"),
+        )
+        super().set(
+            body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
+            button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
+            button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
+            button_primary_text_color="white",
+            block_border_width="3px",
+            block_shadow="*shadow_drop_lg",
+        )

utils/__pycache__/warnings.cpython-310.pyc ADDED Viewed

Binary file (351 Bytes). View file

utils/warnings.py ADDED Viewed

	@@ -0,0 +1,4 @@

+import warnings
+def suppress_warnings():
+    warnings.filterwarnings(action="ignore")

version_one_app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import torch
+from transformers import AutoModel, AutoProcessor
+import gradio as gr
+from PIL import Image
+from gradio.themes import Soft
+from gradio.themes.utils import colors, fonts, sizes
+import warnings
+warnings.filterwarnings(action="ignore")
+colors.orange_red = colors.Color(
+    name="orange_red",
+    c50="#FFF0E5", c100="#FFE0CC", c200="#FFC299", c300="#FFA366",
+    c400="#FF8533", c500="#FF4500", c600="#E63E00", c700="#CC3700",
+    c800="#B33000", c900="#992900", c950="#802200",
+)
+class OrangeRedTheme(Soft):
+    def __init__(self):
+        super().__init__(
+            primary_hue=colors.orange_red,
+            secondary_hue=colors.orange_red,
+            neutral_hue=colors.slate,
+            text_size=sizes.text_lg,
+            font=(fonts.GoogleFont("Outfit"), "Arial", "sans-serif"),
+            font_mono=(fonts.GoogleFont("IBM Plex Mono"), "monospace"),
+        )
+        super().set(
+            body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
+            button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
+            button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
+            button_primary_text_color="white",
+            block_border_width="3px",
+            block_shadow="*shadow_drop_lg",
+        )
+orange_red_theme = OrangeRedTheme()
+MODEL_ID = "openai/clip-vit-base-patch32"
+model = AutoModel.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="sdpa"
+)
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = model.to(device)
+def postprocess_metaclip(probs, labels):
+    return {labels[i]: probs[0][i].item() for i in range(len(labels))}
+def metaclip_detector(image, texts):
+    inputs = processor(text=texts, images=image, return_tensors="pt", padding=True)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probs = outputs.logits_per_image.softmax(dim=1)
+    return probs
+def infer(image, candidate_labels):
+    candidate_labels = [l.strip() for l in candidate_labels.split(",")]
+    probs = metaclip_detector(image, candidate_labels)
+    return postprocess_metaclip(probs, labels=candidate_labels)
+css_style = """
+#container {
+    max-width: 1280px;   /* wider layout */
+    margin: auto;
+}
+@media (min-width: 1600px) {
+    #container {
+        max-width: 1440px;
+    }
+}
+#title h1 {
+    font-size: 2.4em !important;
+}
+"""
+with gr.Blocks(title="AI Document Summarizer") as demo:
+    with gr.Column(elem_id="container"):
+        gr.Markdown("# **Open AI Zero-Shot Classification**", elem_id="title")
+        gr.Markdown("This is the demo of model 'openai/clip-vit-base-patch32' for zero-shot classification.")
+        with gr.Row(equal_height=True):
+            with gr.Column():
+                image_input = gr.Image(type="pil", label="Upload Image", height=310)
+                text_input = gr.Textbox(label="Input labels (comma separated)")
+                run_button = gr.Button("Run", variant="primary")
+            with gr.Column():
+                metaclip_output = gr.Label(
+                    label="Open AI Zero-Shot Classification Output",
+                    num_top_classes=5
+                )
+        with gr.Row(equal_height=True):
+            gr.Examples(
+                examples=[
+                    ["./zebra.jpg", "a photo of a zebra, a photo of a horse, a photo of a donkey"],
+                    ["./cat.jpg", "a photo of a cat, a photo of two cats, a photo of three cats"],
+                    ["./fridge.jpg", "a photo of a fridge, a photo of a cupboard, a photo of a wardrobe"]
+                ],
+                inputs=[image_input, text_input],
+                outputs=[metaclip_output],
+                fn=infer,
+            )
+        run_button.click(
+            fn=infer,
+            inputs=[image_input, text_input],
+            outputs=[metaclip_output]
+        )
+if __name__ == "__main__":
+    demo.queue().launch(
+        theme=orange_red_theme,
+        css=css_style,
+        show_error=True,
+        server_name="0.0.0.0",
+        server_port=7860,
+        debug=True
+    )