create-caption

Paused

nroggendorff commited on Nov 16, 2025

Commit

a04116c

verified ·

1 Parent(s): b5d09ec

Update train.py

Files changed (1) hide show

train.py CHANGED Viewed

@@ -40,34 +40,33 @@ def caption_batch(batch, processor, model):
             image = image.convert("RGB")
         pil_images.append(image)
-    text_inputs = []
-    for _ in pil_images:
         msg = [
             {
                 "role": "user",
                 "content": [
                     {"type": "text", "text": "Describe the image, and skip mentioning that it's illustrated or from anime."},
                 ],
             }
         ]
-        text_inputs.append(processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=True))
     inputs = processor(
-        text=text_inputs,
         images=pil_images,
         return_tensors="pt",
         padding=True
     )
-    input_ids = inputs.input_ids.to(model.device)
-    attention_mask = inputs.attention_mask.to(model.device)
-    pixel_values = inputs.pixel_values.to(model.device)
     with torch.no_grad():
         generated = model.generate(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            pixel_values=pixel_values,
             max_new_tokens=256,
         )

             image = image.convert("RGB")
         pil_images.append(image)
+    messages_list = []
+    for pil_image in pil_images:
         msg = [
             {
                 "role": "user",
                 "content": [
+                    {"type": "image"},
                     {"type": "text", "text": "Describe the image, and skip mentioning that it's illustrated or from anime."},
                 ],
             }
         ]
+        messages_list.append(msg)
+    texts = processor.apply_chat_template(messages_list, add_generation_prompt=True, tokenize=False)
     inputs = processor(
+        text=texts,
         images=pil_images,
         return_tensors="pt",
         padding=True
     )
+    inputs = {k: v.to(model.device) for k, v in inputs.items()}
     with torch.no_grad():
         generated = model.generate(
+            **inputs,
             max_new_tokens=256,
         )