Spaces:

909ahmed
/

CLIP

Sleeping

909ahmed commited on Jul 31, 2024

Commit

f2be81d

verified ·

1 Parent(s): 2fc3055

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,10 +8,11 @@ model, preprocess = clip.load("ViT-B/32", device=device)
 def process_image_and_text(image, text):
-    text_list = text.tolist()
     image = preprocess(image).unsqueeze(0).to(device)
-    text_tokens = clip.tokenize(text_list).to(device)
     with torch.no_grad():
         image_features = model.encode_image(image)
@@ -20,7 +21,7 @@ def process_image_and_text(image, text):
         logits_per_image, logits_per_text = model(image, text_tokens)
         probs = logits_per_image.softmax(dim=-1)
-    return probs
-demo = gr.Interface(fn=process_image_and_text, inputs=[gr.inputs.Image(type="pil"), gr.inputs.Textbox()], outputs="text")
-demo.launch()

 def process_image_and_text(image, text):
+    text = text.split(",")
+    image = Image.fromarray(image)
     image = preprocess(image).unsqueeze(0).to(device)
+    text_tokens = clip.tokenize(text).to(device)
     with torch.no_grad():
         image_features = model.encode_image(image)
         logits_per_image, logits_per_text = model(image, text_tokens)
         probs = logits_per_image.softmax(dim=-1)
+    return probs.cpu().numpy()[0]
+demo = gr.Interface(fn=process_image_and_text, inputs=['image', 'text'], outputs="text")
+demo.launch()