Spaces:

MLInAi
/

CartoonCaptionGen

Runtime error

MLInAi commited on Apr 1, 2024

Commit

b59ed0f

verified ·

1 Parent(s): e8d2c0e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Fixed prompt
 FIXED_PROMPT = "Generate a funny caption from image"
 # Function to generate caption for the uploaded image with the fixed prompt
 def generate_caption(image):
     # Preprocess the image
@@ -27,13 +28,20 @@ def generate_caption(image):
     # Prepend the fixed prompt to the input tensor
     fixed_prompt_tensor = tokenizer(FIXED_PROMPT, return_tensors="pt").input_ids.to(device)
-    input_tensor = torch.cat((fixed_prompt_tensor, image_tensor), dim=1)
     # Generate caption
     output = model.generate(pixel_values=image_tensor)
     caption = tokenizer.decode(output[0], skip_special_tokens=True)
     return caption
 # Streamlit app
 st.title("Image Caption Generator")

 # Fixed prompt
 FIXED_PROMPT = "Generate a funny caption from image"
+# Function to generate caption for the uploaded image with the fixed prompt
 # Function to generate caption for the uploaded image with the fixed prompt
 def generate_caption(image):
     # Preprocess the image
     # Prepend the fixed prompt to the input tensor
     fixed_prompt_tensor = tokenizer(FIXED_PROMPT, return_tensors="pt").input_ids.to(device)
+    # Repeat the prompt tensor to match the batch size of the image tensor
+    batch_size = image_tensor.shape[0]
+    repeated_prompt_tensor = fixed_prompt_tensor.repeat(batch_size, 1)
+    # Concatenate the prompt tensor with the image tensor along the sequence dimension
+    input_tensor = torch.cat((repeated_prompt_tensor, image_tensor), dim=1)
     # Generate caption
     output = model.generate(pixel_values=image_tensor)
     caption = tokenizer.decode(output[0], skip_special_tokens=True)
     return caption
 # Streamlit app
 st.title("Image Caption Generator")