create-caption

Paused

nroggendorff commited on Nov 16, 2025

Commit

748af01

verified ·

1 Parent(s): 337a414

Update train.py

Files changed (1) hide show

train.py CHANGED Viewed

@@ -29,7 +29,11 @@ def load_model(model_name, device_id=0):
     return processor, model
 def caption_batch(batch, processor, model):
     images = batch["image"]
     pil_images = []
@@ -84,6 +88,10 @@ def caption_batch(batch, processor, model):
         d = d.strip()
         captions.append(d)
     return {
         "image": images,
         "text": captions,
@@ -93,6 +101,9 @@ def caption_batch(batch, processor, model):
 def process_shard_worker(
     gpu_id, start, end, model_name, batch_size, input_dataset, output_file
 ):
     torch.cuda.set_device(gpu_id)
     print(f"[GPU {gpu_id}] Loading model...", flush=True)

     return processor, model
+processed_count = 0
 def caption_batch(batch, processor, model):
+    global processed_count
     images = batch["image"]
     pil_images = []
         d = d.strip()
         captions.append(d)
+    processed_count += len(images)
+    if processed_count > 100:
+        print(f"Processed {processed_count} examples so far...")
     return {
         "image": images,
         "text": captions,
 def process_shard_worker(
     gpu_id, start, end, model_name, batch_size, input_dataset, output_file
 ):
+    global processed_count
+    processed_count = 0
     torch.cuda.set_device(gpu_id)
     print(f"[GPU {gpu_id}] Loading model...", flush=True)