Spaces:

chats-bug
/

image-captioning

Runtime error

App Files Files Community

chats-bug commited on May 4, 2023

Commit

b8b6ade

1 Parent(s): 245a3fa

Blip Base testing

Browse files

Files changed (1) hide show

app.py +29 -30

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, Blip2ForConditionalGeneration, VisionEncoderDecoderModel, BitsAndBytesConfig
 import torch
 import open_clip
@@ -16,17 +16,17 @@ device_map = {
 }
 # Load the Blip2 model
-preprocessor_blip2_8_bit = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
-model_blip2_8_bit = Blip2ForConditionalGeneration.from_pretrained(
-	"Salesforce/blip2-opt-2.7b",
-	device_map="auto",
-    quantization_config=quantization_config,
-	load_in_8bit=True
-)
 # Load the Blip base model
-# preprocessor_blip_base = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-# model_blip_base = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 # # Load the Blip large model
 # preprocessor_blip_large = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
@@ -44,8 +44,8 @@ model_blip2_8_bit = Blip2ForConditionalGeneration.from_pretrained(
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Transfer the models to the device
-model_blip2_8_bit.to(device)
-# model_blip_base.to(device)
 # model_blip_large.to(device)
 # model_git_large_coco.to(device)
 # model_oc_coca.to(device)
@@ -84,22 +84,21 @@ def generate_caption(
 	if use_float_16:
 		inputs = inputs.to(torch.float16)
-	# generated_ids = model.generate(
-	# 	pixel_values=inputs.pixel_values,
-	# 	# attention_mask=inputs.attention_mask,
-	# 	max_length=32,
-	# 	use_cache=True,
-	# )
-	# if tokenizer is None:
-	# 	generated_caption = preprocessor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-	# else:
-	# 	generated_caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
-	generated_ids = model.generate(**inputs, max_new_tokens=32)
-	generated_text = preprocessor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
-	return generated_text
 def generate_captions_clip(
@@ -149,10 +148,10 @@ def generate_captions(
 		The generated caption.
 	"""
 	# Generate captions for the image using the Blip2 model
-	caption_blip2_8_bit = generate_caption(preprocessor_blip2_8_bit, model_blip2_8_bit, image, use_float_16=True).strip()
 	# Generate captions for the image using the Blip base model
-	# caption_blip_base = generate_caption(preprocessor_blip_base, model_blip_base, image).strip()
 	# # Generate captions for the image using the Blip large model
 	# caption_blip_large = generate_caption(preprocessor_blip_large, model_blip_large, image).strip()
@@ -163,7 +162,7 @@ def generate_captions(
 	# # Generate captions for the image using the CLIP model
 	# caption_oc_coca = generate_captions_clip(model_oc_coca, transform_oc_coca, image).strip()
-	return caption_blip2_8_bit
 # Create the interface
@@ -177,8 +176,8 @@ iface = gr.Interface(
 	],
 	# Define the outputs
 	outputs=[
-		gr.outputs.Textbox(label="Blip2 8-bit"),
-		# gr.outputs.Textbox(label="Blip base"),
 		# gr.outputs.Textbox(label="Blip large"),
 		# gr.outputs.Textbox(label="GIT large coco"),
 		# gr.outputs.Textbox(label="CLIP"),

 import gradio as gr
+from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, Blip2ForConditionalGeneration, VisionEncoderDecoderModel, BitsAndBytesConfig, BlipProcessor
 import torch
 import open_clip
 }
 # Load the Blip2 model
+# preprocessor_blip2_8_bit = BlipProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
+# model_blip2_8_bit = Blip2ForConditionalGeneration.from_pretrained(
+# 	"Salesforce/blip2-opt-2.7b",
+# 	device_map="auto",
+#     quantization_config=quantization_config,
+# 	load_in_8bit=True
+# )
 # Load the Blip base model
+preprocessor_blip_base = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+model_blip_base = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 # # Load the Blip large model
 # preprocessor_blip_large = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Transfer the models to the device
+# model_blip2_8_bit.to(device)
+model_blip_base.to(device)
 # model_blip_large.to(device)
 # model_git_large_coco.to(device)
 # model_oc_coca.to(device)
 	if use_float_16:
 		inputs = inputs.to(torch.float16)
+	generated_ids = model.generate(
+		pixel_values=inputs.pixel_values,
+		# attention_mask=inputs.attention_mask,
+		max_length=64,
+	)
+	if tokenizer is None:
+		generated_caption = preprocessor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+	else:
+		generated_caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+	# generated_ids = model.generate(**inputs, max_new_tokens=32)
+	# generated_text = preprocessor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+	return generated_caption
 def generate_captions_clip(
 		The generated caption.
 	"""
 	# Generate captions for the image using the Blip2 model
+	# caption_blip2_8_bit = generate_caption(preprocessor_blip2_8_bit, model_blip2_8_bit, image, use_float_16=True).strip()
 	# Generate captions for the image using the Blip base model
+	caption_blip_base = generate_caption(preprocessor_blip_base, model_blip_base, image).strip()
 	# # Generate captions for the image using the Blip large model
 	# caption_blip_large = generate_caption(preprocessor_blip_large, model_blip_large, image).strip()
 	# # Generate captions for the image using the CLIP model
 	# caption_oc_coca = generate_captions_clip(model_oc_coca, transform_oc_coca, image).strip()
+	return caption_blip_base
 # Create the interface
 	],
 	# Define the outputs
 	outputs=[
+		# gr.outputs.Textbox(label="Blip2 8-bit"),
+		gr.outputs.Textbox(label="Blip base"),
 		# gr.outputs.Textbox(label="Blip large"),
 		# gr.outputs.Textbox(label="GIT large coco"),
 		# gr.outputs.Textbox(label="CLIP"),