Spaces:

wilwork
/

KC

Sleeping

App Files Files Community

wilwork commited on Mar 3, 2025

Commit

1d65703

verified ·

1 Parent(s): cf16f32

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -47

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import gradio as gr
 from transformers import AutoModel
 from PIL import Image
 import torch
-import numpy as np
 # Load JinaAI CLIP model
 model = AutoModel.from_pretrained('jinaai/jina-clip-v1', trust_remote_code=True)
@@ -14,54 +13,36 @@ def compute_similarity(input1_type, input1_text, input1_image, input2_type, inpu
     - Image-Image
     - Text-Image & Image-Text
     """
-    # Determine input types
-    if input1_type == "Text":
-        input1 = input1_text.strip()
-        input1_is_text = bool(input1)
-        input1_is_image = False
-    else:
-        input1 = input1_image
-        input1_is_text = False
-        input1_is_image = input1 is not None
-    if input2_type == "Text":
-        input2 = input2_text.strip()
-        input2_is_text = bool(input2)
-        input2_is_image = False
-    else:
-        input2 = input2_image
-        input2_is_text = False
-        input2_is_image = input2 is not None
-    # Ensure valid input
-    if not (input1_is_text or input1_is_image) or not (input2_is_text or input2_is_image):
-        return "Error: Please provide valid inputs (text or image) for both fields!"
     try:
         with torch.no_grad():
-            if input1_is_text and input2_is_text:
                 # Text-Text Similarity
-                emb1 = model.encode_text([input1])
-                emb2 = model.encode_text([input2])
-            elif input1_is_image and input2_is_image:
                 # Image-Image Similarity
-                image1 = Image.fromarray(input1)
-                image2 = Image.fromarray(input2)
-                emb1 = model.encode_image([image1])
-                emb2 = model.encode_image([image2])
             else:
-                # Image-Text Similarity
-                if input1_is_image:
-                    image = Image.fromarray(input1)
-                    text = input2
-                    emb1 = model.encode_image([image])
-                    emb2 = model.encode_text([text])
                 else:
-                    image = Image.fromarray(input2)
-                    text = input1
-                    emb1 = model.encode_text([text])
-                    emb2 = model.encode_image([image])
             # Compute cosine similarity
             similarity_score = (emb1 @ emb2.T).item()
@@ -74,7 +55,7 @@ def compute_similarity(input1_type, input1_text, input1_image, input2_type, inpu
 # Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("# JinaAI CLIP Multimodal Similarity")
-    gr.Markdown("Compare similarity between two inputs: **Text-Text, Image-Image, or Image-Text**.")
     with gr.Row():
         input1_type = gr.Radio(["Text", "Image"], label="Input 1 Type", value="Text")
@@ -90,10 +71,10 @@ with gr.Blocks() as demo:
     def update_visibility(input1_type, input2_type):
         return (
-            input1_type == "Text",  # Input 1 text visibility
-            input1_type == "Image", # Input 1 image visibility
-            input2_type == "Text",  # Input 2 text visibility
-            input2_type == "Image"  # Input 2 image visibility
         )
     input1_type.change(update_visibility, inputs=[input1_type, input2_type], outputs=[input1_text, input1_image, input2_text, input2_image])

 from transformers import AutoModel
 from PIL import Image
 import torch
 # Load JinaAI CLIP model
 model = AutoModel.from_pretrained('jinaai/jina-clip-v1', trust_remote_code=True)
     - Image-Image
     - Text-Image & Image-Text
     """
+    # Validate inputs
+    if input1_type == "Text" and not input1_text.strip():
+        return "Error: Input 1 is empty!"
+    if input1_type == "Image" and input1_image is None:
+        return "Error: Please upload an image for Input 1!"
+    if input2_type == "Text" and not input2_text.strip():
+        return "Error: Input 2 is empty!"
+    if input2_type == "Image" and input2_image is None:
+        return "Error: Please upload an image for Input 2!"
     try:
         with torch.no_grad():
+            if input1_type == "Text" and input2_type == "Text":
                 # Text-Text Similarity
+                emb1 = model.encode_text([input1_text])
+                emb2 = model.encode_text([input2_text])
+            elif input1_type == "Image" and input2_type == "Image":
                 # Image-Image Similarity
+                emb1 = model.encode_image([Image.fromarray(input1_image)])
+                emb2 = model.encode_image([Image.fromarray(input2_image)])
             else:
+                # Image-Text Similarity (either order)
+                if input1_type == "Image":
+                    emb1 = model.encode_image([Image.fromarray(input1_image)])
+                    emb2 = model.encode_text([input2_text])
                 else:
+                    emb1 = model.encode_text([input1_text])
+                    emb2 = model.encode_image([Image.fromarray(input2_image)])
             # Compute cosine similarity
             similarity_score = (emb1 @ emb2.T).item()
 # Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("# JinaAI CLIP Multimodal Similarity")
+    gr.Markdown("Compare similarity between **Text-Text, Image-Image, or Image-Text**.")
     with gr.Row():
         input1_type = gr.Radio(["Text", "Image"], label="Input 1 Type", value="Text")
     def update_visibility(input1_type, input2_type):
         return (
+            input1_type == "Text",  # Input 1 text visible
+            input1_type == "Image", # Input 1 image visible
+            input2_type == "Text",  # Input 2 text visible
+            input2_type == "Image"  # Input 2 image visible
         )
     input1_type.change(update_visibility, inputs=[input1_type, input2_type], outputs=[input1_text, input1_image, input2_text, input2_image])