Spaces:

Hasani
/

Binary-Image-Classification-In-The-Wild

Runtime error

App Files Files Community

IbrahimHasani commited on Sep 8, 2023

Commit

dab8972

1 Parent(s): 93fe568

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -14

app.py CHANGED Viewed

@@ -6,36 +6,39 @@ import gradio as gr
 processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
-def image_similarity(image: Image.Image, positive_prompt: str, negative_prompt: str):
     inputs = processor(
-        text=[positive_prompt, negative_prompt],
-        images=image,
-        return_tensors="pt",
         padding=True
     )
     outputs = model(**inputs)
-    logits_per_image = outputs.logits_per_image  # image-text similarity score
-    probs = logits_per_image.softmax(dim=1)  # take the softmax to get the label probabilities
-    # Determine if positive prompt has a higher probability than the negative prompt
-    result = probs[0][0] > probs[0][1]
-    return bool(result), f"Probabilities: Positive {probs[0][0]:.4f}, Negative {probs[0][1]:.4f}"
 interface = gr.Interface(
     fn=image_similarity,
     inputs=[
         gr.components.Image(type="pil"),
-        gr.components.Text(label="Enter positive prompt e.g. 'a smiling face'"),
-        gr.components.Text(label="Enter negative prompt e.g. 'a sad face'")
     ],
     outputs=[
         gr.components.Textbox(label="Result"),
-        gr.components.Textbox(label="Probabilities")
     ],
     title="Engagify's Image Action Detection",
-    description="[Author: Ibrahim Hasani] This Method uses CLIP-VIT [Version: BASE-PATCH-16] to determine if an action is being performed in a image or not. (Binaray Classifier). It contrasts an Action against a negative label. Ensure the prompts accurately describe the desired detection.",
     live=False,
     theme=gr.themes.Monochrome(),

 processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+def image_similarity(image: Image.Image, positive_prompt: str, negative_prompts: list):
+    prompts = [positive_prompt] + negative_prompts
     inputs = processor(
+        text=prompts,
+        images=image,
+        return_tensors="pt",
         padding=True
     )
     outputs = model(**inputs)
+    logits_per_image = outputs.logits_per_image
+    probs = logits_per_image.softmax(dim=1)
+    # Determine if positive prompt has a higher probability than any of the negative prompts
+    is_positive_highest = probs[0][0] > max(probs[0][1:])
+    return bool(is_positive_highest), f"Probability for Positive Prompt: {probs[0][0]:.4f}"
 interface = gr.Interface(
     fn=image_similarity,
     inputs=[
         gr.components.Image(type="pil"),
+        gr.components.Text(label="Enter positive prompt e.g. 'a person drinking a beverage'"),
+        gr.components.Textbox(label="Enter negative prompts, separated by semicolon e.g. 'an empty scene; person without beverage'", placeholder="negative prompt 1; negative prompt 2; ..."),
     ],
     outputs=[
         gr.components.Textbox(label="Result"),
+        gr.components.Textbox(label="Probability for Positive Prompt")
     ],
     title="Engagify's Image Action Detection",
+    description="[Author: Ibrahim Hasani] This Method uses CLIP-VIT [Version: BASE-PATCH-16] to determine if an action is being performed in an image or not. (Binary Classifier). It contrasts an Action against multiple negative labels. Ensure the prompts accurately describe the desired detection.",
     live=False,
     theme=gr.themes.Monochrome(),