aswin-raghavan commited on
Commit
c32f3ac
·
1 Parent(s): ca4b933

CLIP feature extractor

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -3,26 +3,32 @@ import gradio as gr
3
  from transformers import pipeline
4
  import numpy as np
5
  from PIL import Image
 
 
 
6
 
7
-
8
- pipe = pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch32")
9
- images="dog.jpg"
10
 
11
  def shot(image, labels_text):
12
  PIL_image = Image.fromarray(np.uint8(image)).convert('RGB')
13
  labels = labels_text.split(",")
14
- res = pipe(images=PIL_image,
15
- candidate_labels=labels,
16
- hypothesis_template= "This is a photo of a {}")
17
- return {dic["label"]: dic["score"] for dic in res}
 
 
 
 
18
 
19
  iface = gr.Interface(shot,
20
  ["image", "text"],
21
- "label",
22
  examples=[["dog.jpg", "dog,cat,bird,animal"],
23
  # ["germany.jpg", "germany,belgium,colombia"],
24
  ["colombia.jpg", "germany,belgium,colombia"]],
25
  description="Add a picture and a list of labels separated by commas",
26
- title="Zero-shot Image Classification")
27
 
28
  iface.launch()
 
3
  from transformers import pipeline
4
  import numpy as np
5
  from PIL import Image
6
+ from transformers import CLIPProcessor, CLIPModel
7
+ clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
8
+ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
9
 
10
+ # pipe = pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch32")
11
+ # images="dog.jpg"
 
12
 
13
  def shot(image, labels_text):
14
  PIL_image = Image.fromarray(np.uint8(image)).convert('RGB')
15
  labels = labels_text.split(",")
16
+ inputs = clip_processor(text=["a photo of a cat", "a photo of a dog"], images=PIL_image, return_tensors="pt", padding=True)
17
+ outputs = clip_model(**inputs)
18
+ print(outputs)
19
+ return outputs.image_embeds
20
+ # res = pipe(images=PIL_image,
21
+ # candidate_labels=labels,
22
+ # hypothesis_template= "This is a photo of a {}")
23
+ # return {dic["label"]: dic["score"] for dic in res}
24
 
25
  iface = gr.Interface(shot,
26
  ["image", "text"],
27
+ "text",
28
  examples=[["dog.jpg", "dog,cat,bird,animal"],
29
  # ["germany.jpg", "germany,belgium,colombia"],
30
  ["colombia.jpg", "germany,belgium,colombia"]],
31
  description="Add a picture and a list of labels separated by commas",
32
+ title="CLIP feature extractor")
33
 
34
  iface.launch()