aswin-raghavan commited on
Commit
f30f5b4
·
1 Parent(s): c32f3ac

add rolling annotation

Browse files
Files changed (1) hide show
  1. app.py +38 -16
app.py CHANGED
@@ -4,31 +4,53 @@ from transformers import pipeline
4
  import numpy as np
5
  from PIL import Image
6
  from transformers import CLIPProcessor, CLIPModel
 
 
7
  clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
8
  clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
9
 
10
  # pipe = pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch32")
11
  # images="dog.jpg"
12
 
13
- def shot(image, labels_text):
14
  PIL_image = Image.fromarray(np.uint8(image)).convert('RGB')
15
- labels = labels_text.split(",")
16
  inputs = clip_processor(text=["a photo of a cat", "a photo of a dog"], images=PIL_image, return_tensors="pt", padding=True)
17
  outputs = clip_model(**inputs)
18
- print(outputs)
19
  return outputs.image_embeds
20
- # res = pipe(images=PIL_image,
21
- # candidate_labels=labels,
22
- # hypothesis_template= "This is a photo of a {}")
23
- # return {dic["label"]: dic["score"] for dic in res}
 
 
 
 
 
 
 
24
 
25
- iface = gr.Interface(shot,
26
- ["image", "text"],
27
- "text",
28
- examples=[["dog.jpg", "dog,cat,bird,animal"],
29
- # ["germany.jpg", "germany,belgium,colombia"],
30
- ["colombia.jpg", "germany,belgium,colombia"]],
31
- description="Add a picture and a list of labels separated by commas",
32
- title="CLIP feature extractor")
 
 
 
 
 
33
 
34
- iface.launch()
 
 
 
 
 
 
 
 
 
 
4
  import numpy as np
5
  from PIL import Image
6
  from transformers import CLIPProcessor, CLIPModel
7
+ import pandas as pd
8
+
9
  clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
10
  clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
11
 
12
  # pipe = pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch32")
13
  # images="dog.jpg"
14
 
15
+ def extract_features(image):
16
  PIL_image = Image.fromarray(np.uint8(image)).convert('RGB')
 
17
  inputs = clip_processor(text=["a photo of a cat", "a photo of a dog"], images=PIL_image, return_tensors="pt", padding=True)
18
  outputs = clip_model(**inputs)
19
+ print(outputs.shape)
20
  return outputs.image_embeds
21
+
22
+
23
+ def update_table_up(img, df, state):
24
+ img_name = state.pop()
25
+ next_img = state[0]
26
+ img_embeds = extract_features(img)
27
+ print(img_name, img.shape, img_embeds.shape)
28
+ new_df = pd.DataFrame({'image_name': img_name, 'image_embed': img_embeds, 'label': 1})
29
+ print(new_df)
30
+ df = pd.concat([df, new_df])
31
+ return next_img, df, state
32
 
33
+ with gr.Blocks() as demo:
34
+ train_images = gr.State(["dog.jpg", "colombia.jpg", "germany.jpg"])
35
+ # start_button = gr.Button(label="Start")
36
+ image_display = gr.Image()
37
+ # text_display = gr.Text()
38
+ with gr.Column():
39
+ upvote = gr.Button("Up")
40
+ downvote = gr.Button("down")
41
+ annotated_samples = gr.Dataframe(headers=['image_name', 'image_embed', 'label'], row_count=(1, 'dynamic'),
42
+ col_count=3, label='Annotations', wrap=True)
43
+ upvote.click(update_table_up, inputs=[image_display, annotated_samples, train_images], outputs=[image_display, annotated_samples, train_images])
44
+ # downvote.click(update_table_down, inputs=[image_display, annotated_samples], outputs=[image_display,annotated_samples])
45
+ # examples = gr.Examples(examples=[["dog.jpg"], ["colombia.jpg"]], inputs=[image_display])
46
 
47
+ # iface = gr.Interface(shot,
48
+ # ["image", "text"],
49
+ # "text",
50
+ # examples=[["dog.jpg", "dog,cat,bird,animal"],
51
+ # # ["germany.jpg", "germany,belgium,colombia"],
52
+ # ["colombia.jpg", "germany,belgium,colombia"]],
53
+ # description="Add a picture and a list of labels separated by commas",
54
+ # title="CLIP feature extractor")
55
+ demo.load(lambda: train_images[0], inputs=[], outputs=image_display)
56
+ demo.launch()