muneebashraf commited on
Commit
a74e498
·
1 Parent(s): 5b986f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -16
app.py CHANGED
@@ -3,28 +3,24 @@ import requests
3
  from PIL import Image
4
  from transformers import BlipProcessor, BlipForConditionalGeneration
5
 
 
6
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
7
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
8
 
9
- def caption_image(input_image):
10
- raw_image = Image.open(input_image).convert('RGB')
 
 
11
 
12
- # Conditional image captioning
13
- text = "a photography of"
14
- inputs = processor(raw_image, text, return_tensors="pt")
15
- out = model.generate(**inputs)
16
- caption_conditional = processor.decode(out[0], skip_special_tokens=True)
17
-
18
- # Unconditional image captioning
19
  inputs = processor(raw_image, return_tensors="pt")
20
  out = model.generate(**inputs)
21
- caption_unconditional = processor.decode(out[0], skip_special_tokens=True)
22
 
23
- return f"Conditional Caption: {caption_conditional}\nUnconditional Caption: {caption_unconditional}"
24
 
25
- # Define the Gradio input interface
26
- inputs = gr.inputs.Image()
27
- output = gr.outputs.Textbox()
28
 
29
- # Launch the Gradio app
30
- gr.Interface(fn=caption_image, inputs=inputs, outputs=output, live=True).launch()
 
3
  from PIL import Image
4
  from transformers import BlipProcessor, BlipForConditionalGeneration
5
 
6
+ # Load BLIP model
7
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
8
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
9
 
10
+ # Define function for generating captions
11
+ def generate_caption(image):
12
+ # Convert image to PIL format
13
+ raw_image = Image.open(image).convert('RGB')
14
 
15
+ # Preprocess the image and generate caption
 
 
 
 
 
 
16
  inputs = processor(raw_image, return_tensors="pt")
17
  out = model.generate(**inputs)
18
+ caption = processor.decode(out[0], skip_special_tokens=True)
19
 
20
+ return caption
21
 
22
+ # Create Gradio interface
23
+ image_input = gr.inputs.Image()
24
+ caption_output = gr.outputs.Textbox()
25
 
26
+ gr.Interface(fn=generate_caption, inputs=image_input, outputs=caption_output).launch()