Spaces:

MLInAi
/

CartoonCaptionGen

Runtime error

MLInAi commited on Apr 1, 2024

Commit

65aba2a

verified ·

1 Parent(s): c17b215

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,10 +9,7 @@ model = BlipForConditionalGeneration.from_pretrained("MLInAi/CartoonCaptionGen")
 tokenizer = AutoTokenizer.from_pretrained("MLInAi/CartoonCaptionGen")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Fixed prompt
-FIXED_PROMPT = "Generate a funny caption from image"
-# Function to generate caption for the uploaded image with the fixed prompt
 def generate_caption(image):
     # Preprocess the image
     image = Image.open(image).convert("RGB")
@@ -25,20 +22,6 @@ def generate_caption(image):
     ])
     image_tensor = transform(image).unsqueeze(0).to(device)
-    # Prepend the fixed prompt to the input tensor
-    fixed_prompt_tensor = tokenizer(FIXED_PROMPT, return_tensors="pt").input_ids.to(device)
-    # Repeat the prompt tensor to match the batch size of the image tensor
-    batch_size = image_tensor.shape[0]
-    repeated_prompt_tensor = fixed_prompt_tensor.repeat(batch_size, 1)
-    # Reshape the image tensor to match the shape of the prompt tensor
-    # The reshaping depends on the model's input requirements
-    image_tensor = image_tensor.view(batch_size, -1)
-    # Concatenate the prompt tensor with the reshaped image tensor along the sequence dimension
-    input_tensor = torch.cat((repeated_prompt_tensor, image_tensor), dim=1)
     # Generate caption
     output = model.generate(pixel_values=image_tensor)
     caption = tokenizer.decode(output[0], skip_special_tokens=True)

 tokenizer = AutoTokenizer.from_pretrained("MLInAi/CartoonCaptionGen")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Function to generate caption for the uploaded image
 def generate_caption(image):
     # Preprocess the image
     image = Image.open(image).convert("RGB")
     ])
     image_tensor = transform(image).unsqueeze(0).to(device)
     # Generate caption
     output = model.generate(pixel_values=image_tensor)
     caption = tokenizer.decode(output[0], skip_special_tokens=True)