Spaces:

chats-bug
/

image-captioning

Runtime error

chats-bug commited on May 4, 2023

Commit

7295a68

1 Parent(s): fbee9c4

More testing

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import gradio as gr
 from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, VisionEncoderDecoderModel, BitsAndBytesConfig
 import torch
 import open_clip
 from huggingface_hub import hf_hub_download
@@ -60,13 +62,13 @@ def generate_caption(
 		The generated caption.
 	"""
 	inputs = preprocessor(image, return_tensors="pt").to(device)
 	if use_float_16:
 		inputs = inputs.to(torch.float16)
 	generated_ids = model.generate(
-		pixel_values=inputs.pixel_values,
-		# attention_mask=inputs.attention_mask,
 		max_length=64,
 	)
@@ -113,7 +115,8 @@ def generate_captions_clip(
 def generate_captions(
 	image,
 	max_length,
-	temperature
 ):
 	"""
 	Generate captions for the given image.
@@ -133,6 +136,10 @@ def generate_captions(
 	caption_git_large_coco = ""
 	caption_oc_coca = ""
 	# Generate captions for the image using the Blip base model
 	try:
 		caption_blip_base = generate_caption(preprocessor_blip_base, model_blip_base, image).strip()
@@ -168,6 +175,7 @@ iface = gr.Interface(
 		gr.inputs.Image(label="Image"),
 		gr.inputs.Slider(minimum=16, maximum=64, step=2, default=32, label="Max Length"),
 		gr.inputs.Slider(minimum=0.5, maximum=1.5, step=0.1, default=1.0, label="Temperature"),
 	],
 	# Define the outputs
 	outputs=[
@@ -182,4 +190,4 @@ iface = gr.Interface(
 )
 # Launch the interface
-iface.launch()

 from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, VisionEncoderDecoderModel, BitsAndBytesConfig
 import torch
 import open_clip
+from PIL import Image
+import requests
 from huggingface_hub import hf_hub_download
 		The generated caption.
 	"""
 	inputs = preprocessor(image, return_tensors="pt").to(device)
+	pixel_values = preprocessor(images=image, return_tensors="pt").pixel_values
 	if use_float_16:
 		inputs = inputs.to(torch.float16)
 	generated_ids = model.generate(
+		pixel_values=pixel_values,
 		max_length=64,
 	)
 def generate_captions(
 	image,
 	max_length,
+	temperature,
+	use_sample_image,
 ):
 	"""
 	Generate captions for the given image.
 	caption_git_large_coco = ""
 	caption_oc_coca = ""
+	if use_sample_image:
+		url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+		image = Image.open(requests.get(url, stream=True).raw)
 	# Generate captions for the image using the Blip base model
 	try:
 		caption_blip_base = generate_caption(preprocessor_blip_base, model_blip_base, image).strip()
 		gr.inputs.Image(label="Image"),
 		gr.inputs.Slider(minimum=16, maximum=64, step=2, default=32, label="Max Length"),
 		gr.inputs.Slider(minimum=0.5, maximum=1.5, step=0.1, default=1.0, label="Temperature"),
+		gr.inputs.Checkbox(default=False, type="bool", label="Use example image")
 	],
 	# Define the outputs
 	outputs=[
 )
 # Launch the interface
+iface.launch(debug=True)