Spaces:

prithivMLmods
/

metaclip-2-demo

Running

App Files Files Community

prithivMLmods commited on 29 days ago

Commit

fe17f1e

verified ·

1 Parent(s): 30007b5

update app

Browse files

Files changed (1) hide show

app.py +60 -53

app.py CHANGED Viewed

@@ -28,7 +28,7 @@ class OrangeRedTheme(Soft):
         self,
         *,
         primary_hue: colors.Color | str = colors.gray,
-        secondary_hue: colors.Color | str = colors.orange_red, # Use the new color
         neutral_hue: colors.Color | str = colors.slate,
         text_size: sizes.Size | str = sizes.text_lg,
         font: fonts.Font | str | Iterable[fonts.Font | str] = (
@@ -55,80 +55,87 @@ class OrangeRedTheme(Soft):
             button_primary_text_color_hover="white",
             button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
             button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
-            button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_700)",
-            button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_600)",
-            button_secondary_text_color="black",
-            button_secondary_text_color_hover="white",
-            button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
-            button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
-            button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
-            button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
-            slider_color="*secondary_500",
-            slider_color_dark="*secondary_600",
             block_title_text_weight="600",
-            block_border_width="3px",
             block_shadow="*shadow_drop_lg",
-            button_primary_shadow="*shadow_drop_lg",
-            button_large_padding="11px",
-            color_accent_soft="*primary_100",
-            block_label_background_fill="*primary_200",
         )
 orange_red_theme = OrangeRedTheme()
-model = AutoModel.from_pretrained("facebook/metaclip-2-mt5-worldwide-s16", torch_dtype=torch.bfloat16, attn_implementation="sdpa")
 processor = AutoProcessor.from_pretrained("facebook/metaclip-2-mt5-worldwide-s16")
 def postprocess_metaclip(probs, labels):
-    output = {labels[i]: probs[0][i].item() for i in range(len(labels))}
-    return output
 def metaclip_detector(image, texts):
     inputs = processor(text=texts, images=image, return_tensors="pt", padding=True)
     with torch.no_grad():
         outputs = model(**inputs)
-        logits_per_image = outputs.logits_per_image
-        probs = logits_per_image.softmax(dim=1)
     return probs
 def infer(image, candidate_labels):
-    candidate_labels = [label.lstrip(" ") for label in candidate_labels.split(",")]
     probs = metaclip_detector(image, candidate_labels)
     return postprocess_metaclip(probs, labels=candidate_labels)
-css="""
-#col-container {
-    margin: 0 auto;
-    max-width: 960px;
 }
-#main-title h1 {font-size: 2.1em !important;}
 """
 with gr.Blocks(css=css, theme=orange_red_theme) as demo:
-    gr.Markdown("# **MetaCLIP 2 Zero-Shot Classification**")
-    gr.Markdown(
-        "The demo of MetaCLIP 2 for zero-shot classification in this Space."
-    )
-    with gr.Row():
-        with gr.Column():
-            image_input = gr.Image(type="pil")
-            text_input = gr.Textbox(label="Input a list of labels (comma seperated)")
-            run_button = gr.Button("Run", variant="primary")
-        with gr.Column():
-            metaclip_output = gr.Label(label="MetaCLIP 2 Output", num_top_classes=3)
-    examples = [
-        ["./baklava.jpg", "dessert on a plate, a serving of baklava, a plate and spoon"],
-        ["./cat.jpg", "a cat, two cats, three cats"],
-        ["./cat.jpg", "two sleeping cats, two cats playing, three cats laying down"],
-    ]
-    gr.Examples(
-        examples=examples,
-        inputs=[image_input, text_input],
-        outputs=[metaclip_output],
-        fn=infer,
-    )
-    run_button.click(fn=infer, inputs=[image_input, text_input], outputs=[metaclip_output])
 demo.launch()

         self,
         *,
         primary_hue: colors.Color | str = colors.gray,
+        secondary_hue: colors.Color | str = colors.orange_red,
         neutral_hue: colors.Color | str = colors.slate,
         text_size: sizes.Size | str = sizes.text_lg,
         font: fonts.Font | str | Iterable[fonts.Font | str] = (
             button_primary_text_color_hover="white",
             button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
             button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
             block_title_text_weight="600",
             block_shadow="*shadow_drop_lg",
         )
 orange_red_theme = OrangeRedTheme()
+model = AutoModel.from_pretrained(
+    "facebook/metaclip-2-mt5-worldwide-s16",
+    torch_dtype=torch.bfloat16,
+    attn_implementation="sdpa"
+)
 processor = AutoProcessor.from_pretrained("facebook/metaclip-2-mt5-worldwide-s16")
 def postprocess_metaclip(probs, labels):
+    return {labels[i]: probs[0][i].item() for i in range(len(labels))}
 def metaclip_detector(image, texts):
     inputs = processor(text=texts, images=image, return_tensors="pt", padding=True)
     with torch.no_grad():
         outputs = model(**inputs)
+        probs = outputs.logits_per_image.softmax(dim=1)
     return probs
 def infer(image, candidate_labels):
+    candidate_labels = [l.strip() for l in candidate_labels.split(",")]
     probs = metaclip_detector(image, candidate_labels)
     return postprocess_metaclip(probs, labels=candidate_labels)
+css = """
+#root, body, html {
+    margin: 0;
+    padding: 0;
+    height: 100%;
+}
+.center-container {
+    max-width: 900px;
+    margin: 0 auto !important;
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+}
+#main-title h1 {
+    text-align: center !important;
+    width: 100%;
 }
 """
 with gr.Blocks(css=css, theme=orange_red_theme) as demo:
+    with gr.Column(elem_classes="center-container"):
+        gr.Markdown("# **MetaCLIP 2 Zero-Shot Classification**", elem_id="main-title")
+        gr.Markdown("This is the demo of MetaCLIP 2 for zero-shot classification.")
+        with gr.Row():
+            with gr.Column():
+                image_input = gr.Image(type="pil", label="Upload Image")
+                text_input = gr.Textbox(label="Input labels (comma separated)")
+                run_button = gr.Button("Run", variant="primary")
+            with gr.Column():
+                metaclip_output = gr.Label(
+                    label="MetaCLIP 2 Output",
+                    num_top_classes=3
+                )
+        gr.Examples(
+            examples=[
+                ["./baklava.jpg", "dessert on a plate, baklava"],
+                ["./cat.jpg", "a cat, two cats, three cats"],
+                ["./cat.jpg", "two sleeping cats, two cats playing, three cats laying down"],
+            ],
+            inputs=[image_input, text_input],
+            outputs=[metaclip_output],
+            fn=infer,
+        )
+        run_button.click(
+            fn=infer,
+            inputs=[image_input, text_input],
+            outputs=[metaclip_output]
+        )
 demo.launch()