ayh015
/

AutoLLMAnnotation

ayh015 commited on Jan 21

Commit

ecc5e33

1 Parent(s): d7afebb

save the jsons per gpu with rank

Files changed (1) hide show

tools/annotate.py CHANGED Viewed

@@ -105,6 +105,7 @@ def worker(model, processor, dataset, args, output_dir):
     batch_size = 1
     data_loader = DataLoader(sub_dataset, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=DataCollatorForSupervisedDataset(processor, args.data_path))
     labels = []
     for batch_tensors, result_meta in tqdm(data_loader):
         input_ids = batch_tensors['input_ids'].cuda()
@@ -139,11 +140,12 @@ def worker(model, processor, dataset, args, output_dir):
                 'action_labels': meta['hoi_obj']['action_labels'],
                 'description': output,
             })
-        break
-    output_path = os.path.join(args.output_dir, 'labels.json')
-    gather_labels_and_save(labels, output_path=output_path)
 def eval_model(args):
     torch.distributed.init_process_group(backend='nccl')

     batch_size = 1
     data_loader = DataLoader(sub_dataset, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=DataCollatorForSupervisedDataset(processor, args.data_path))
     labels = []
     for batch_tensors, result_meta in tqdm(data_loader):
         input_ids = batch_tensors['input_ids'].cuda()
                 'action_labels': meta['hoi_obj']['action_labels'],
                 'description': output,
             })
+    local_rank = int(os.environ.get("LOCAL_RANK", "0"))
+    output_path = os.path.join(args.output_dir, f'labels_{local_rank}.json')
+    with open(output_path, "w", encoding="utf-8") as f:
+            json.dump(labels, f, ensure_ascii=False, indent=2)
 def eval_model(args):
     torch.distributed.init_process_group(backend='nccl')