AmandaPanda commited on
Commit
d3f61c4
·
verified ·
1 Parent(s): f16a87f

Reorganize code (e.g., import tools and libraries at the start). Edit and tweak.

Browse files
Files changed (1) hide show
  1. app.py +34 -65
app.py CHANGED
@@ -1,90 +1,59 @@
 
1
  import gradio as gr
2
 
3
-
4
-
5
- # Load vision capability to support image display
6
- ##pip install datasets
7
-
8
- # Load pandas for grid display
9
- ##pip install pandas
10
  import pandas as pd
11
-
12
- # Load first 20 rows of dataset (merve/coco)
13
  from datasets import load_dataset
14
- dataset = load_dataset("merve/coco", split='train', stream=True)
15
-
16
- # Reduce dataset to 20 rows
17
- df = pd.dataset.iloc[0:19]
18
-
19
- print ("Print to show the 20 images available.")
20
- print ("The app will then select an image for further exploration.")
21
- print(df.head(20))
22
-
23
- # Use the sample command
24
- selected_image = df.sample(n=1)
25
-
26
-
27
- # Get url for image
28
- def parse_url(df):
29
- for index, row in df.iterrows():
30
- parsed = urlparse(str(row)).query # <- Notice the change here
31
- parsed = parse_qs(parsed)
32
- for k, v in parsed.items(): #use items() in Python3 and iteritems() in Python2
33
- df.loc[index, k.strip()] = v[0].strip().lower()
34
- return df
35
-
36
-
37
- image_url = parse_url(df['image'])
38
-
39
-
40
-
41
-
42
- print (selected_image)
43
 
 
 
 
44
 
 
 
45
 
46
- def greet(name):
47
- return "Hello " + name + "!!"
48
 
49
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
50
- demo.launch()
51
 
52
- import requests
53
- from PIL import Image
54
- from transformers import BlipProcessor, BlipForConditionalGeneration
55
 
 
56
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
57
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
58
 
 
 
59
 
60
- ##img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
61
- #Select image
62
 
63
- raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
 
64
 
65
- # conditional image captioning
66
- text = "a photography of"
67
- inputs = processor(raw_image, text, return_tensors="pt")
68
 
69
- out = model.generate(**inputs)
70
- print(processor.decode(out[0], skip_special_tokens=True))
71
 
72
- # unconditional image captioning
73
- inputs = processor(raw_image, return_tensors="pt")
74
 
75
- out = model.generate(**inputs)
76
- print(processor.decode(out[0], skip_special_tokens=True))
77
 
 
 
 
 
 
 
 
78
 
 
79
 
80
- # Get image database
81
- ##curl -X GET \
82
- ## "https://datasets-server.huggingface.co/first-rows?dataset=merve%2Fcoco&config=default&split=validation"
83
 
84
- # Load transformer Salesforce/blip image captioning
85
- # Load model directly
86
- ##from transformers import AutoProcessor, AutoModelForVision2Seq
87
 
88
- ##processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
89
- ##model = AutoModelForVision2Seq.from_pretrained("Salesforce/blip-image-captioning-large")
90
 
 
1
+ # Import gradio - app framework
2
  import gradio as gr
3
 
4
+ # Import pandas datasets, transformers, torch
 
 
 
 
 
 
5
  import pandas as pd
 
 
6
  from datasets import load_dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ from transformers import BlipProcessor, BlipForConditionalGeneration
9
+ from PIL import Image
10
+ import torch
11
 
12
+ # Get merve/coco dataset
13
+ dataset = load_dataset("merve/coco", split='train', stream=True)
14
 
15
+ # Reduce dataset to 20 rows, i.e., get sample
16
+ samples = list(dataset_stream.take(20))
17
 
18
+ #Convert to dataframe
19
+ df = pd.DataFrame(samples)
20
 
21
+ ## print ("Print to show the 20 images available.")
22
+ ## print ("The app will then select an image for further exploration.")
23
+ ## print(df.head(20))
24
 
25
+ #Load the image captioning model (Salesforce/blip-image-captioning-large)
26
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
27
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
28
 
29
+ #Configure captioning function
30
+ def caption_random_image():
31
 
32
+ # pick random row
33
+ sample = df.sample(1).iloc[0]
34
 
35
+ # 'image' field contains an actual PIL image
36
+ image = sample["image"]
37
 
38
+ # Unconditional image captioning
39
+ inputs = processor(image, return_tensors="pt")
 
40
 
41
+ out = model.generate(**inputs)
42
+ caption = processor.decode(out[0], skip_special_tokens=True)
43
 
44
+ return image, caption
 
45
 
 
 
46
 
47
+ demo = gr.Interface(
48
+ fn=caption_random_image,
49
+ inputs=None,
50
+ outputs=["image", "text"],
51
+ title="Image Captioning",
52
+ description="Pulls a random image from merve/coco and captions it using BLIP.",
53
+ )
54
 
55
+ demo.launch()
56
 
 
 
 
57
 
 
 
 
58
 
 
 
59