tejeshbhalla commited on
Commit
90aed4a
·
verified ·
1 Parent(s): e090cc5

Training in progress, step 10

Browse files
adapter_config.json CHANGED
@@ -25,11 +25,11 @@
25
  "target_modules": [
26
  "k_proj",
27
  "gate_proj",
 
 
28
  "up_proj",
29
- "down_proj",
30
  "v_proj",
31
- "q_proj",
32
- "o_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
25
  "target_modules": [
26
  "k_proj",
27
  "gate_proj",
28
+ "o_proj",
29
+ "q_proj",
30
  "up_proj",
 
31
  "v_proj",
32
+ "down_proj"
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:755497d3e79899a1080a522d163d8e27692bd91bc89410f83e45e4f8fa1f795c
3
  size 6627156248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d05fc15dabd7abb19233eb4f243c91c5572426a29ca8049df3db628a3458c26
3
  size 6627156248
args.json CHANGED
@@ -72,7 +72,7 @@
72
  "custom_register_path": [],
73
  "ignore_args_error": false,
74
  "use_swift_lora": false,
75
- "output_dir": "/root/dataDisk/llama-new/output/v8-20250126-122947",
76
  "overwrite_output_dir": false,
77
  "do_train": false,
78
  "do_eval": false,
@@ -83,32 +83,32 @@
83
  "per_device_eval_batch_size": 1,
84
  "per_gpu_train_batch_size": null,
85
  "per_gpu_eval_batch_size": null,
86
- "gradient_accumulation_steps": 16,
87
  "eval_accumulation_steps": null,
88
  "eval_delay": 0,
89
  "torch_empty_cache_steps": null,
90
- "learning_rate": 5e-05,
91
  "weight_decay": 0.01,
92
  "adam_beta1": 0.9,
93
  "adam_beta2": 0.999,
94
  "adam_epsilon": 1e-08,
95
  "max_grad_norm": 1.0,
96
- "num_train_epochs": 3.0,
97
  "max_steps": -1,
98
  "lr_scheduler_type": "linear",
99
  "lr_scheduler_kwargs": null,
100
- "warmup_ratio": 0.1,
101
  "warmup_steps": 0,
102
  "log_level": "passive",
103
  "log_level_replica": "warning",
104
  "log_on_each_node": true,
105
- "logging_dir": "/root/dataDisk/llama-new/output/v8-20250126-122947/runs",
106
  "logging_strategy": "steps",
107
  "logging_first_step": true,
108
  "logging_steps": 1,
109
  "logging_nan_inf_filter": true,
110
  "save_strategy": "steps",
111
- "save_steps": 100.0,
112
  "save_total_limit": 2,
113
  "save_safetensors": true,
114
  "save_on_each_node": false,
@@ -132,7 +132,7 @@
132
  "tpu_metrics_debug": false,
133
  "debug": null,
134
  "dataloader_drop_last": false,
135
- "eval_steps": 100.0,
136
  "dataloader_num_workers": 0,
137
  "dataloader_prefetch_factor": null,
138
  "past_index": -1,
@@ -366,8 +366,8 @@
366
  "local_world_size": 8,
367
  "model_suffix": "v3-20250124-205715-merged",
368
  "model_info": "ModelInfo(model_type='llama3_2', model_dir='/root/highspeedstorage/ft-volume/v3-20250124-205715-merged', torch_dtype=torch.bfloat16, max_model_len=8192, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)",
369
- "model_meta": "ModelMeta(model_type='llama3_2', model_groups=[ModelGroup(models=[Model(ms_model_id='LLM-Research/Llama-3.2-1B', hf_model_id='meta-llama/Llama-3.2-1B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-3B', hf_model_id='meta-llama/Llama-3.2-3B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-1B-Instruct', hf_model_id='meta-llama/Llama-3.2-1B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-3B-Instruct', hf_model_id='meta-llama/Llama-3.2-3B-Instruct', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[]), ModelGroup(models=[Model(ms_model_id='LLM-Research/Llama-3.3-70B-Instruct', hf_model_id='meta-llama/Llama-3.3-70B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='unsloth/Llama-3.3-70B-Instruct-bnb-4bit', hf_model_id='unsloth/Llama-3.3-70B-Instruct-bnb-4bit', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='llama3_2', get_function=<function get_model_tokenizer_with_flash_attn at 0x7fec77a7b2e0>, model_arch='llama', architectures=['LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=['transformers>=4.45'], tags=[])",
370
  "model_dir": "/root/highspeedstorage/ft-volume/v3-20250124-205715-merged",
371
  "hub": "<class 'swift.hub.hub.HFHub'>",
372
- "training_args": "ORPOConfig(output_dir='/root/dataDisk/llama-new/output/v8-20250126-122947', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=<IntervalStrategy.STEPS: 'steps'>, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=16, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=5e-05, weight_decay=0.01, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=<SchedulerType.LINEAR: 'linear'>, lr_scheduler_kwargs=None, warmup_ratio=0.1, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/root/dataDisk/llama-new/output/v8-20250126-122947/runs', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=True, logging_steps=1, logging_nan_inf_filter=True, save_strategy=<SaveStrategy.STEPS: 'steps'>, save_steps=100, save_total_limit=2, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=100, dataloader_num_workers=0, dataloader_prefetch_factor=None, past_index=-1, run_name='/root/dataDisk/llama-new/output/v8-20250126-122947', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'cpu', 'pin_memory': True}, 'offload_param': {'device': 'cpu', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=<OptimizerNames.ADAMW_TORCH: 'adamw_torch'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['wandb'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=True, resume_from_checkpoint=None, hub_model_id='TheAgenticAI/LLAMA-3.3-70B-Reasoning', hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=True, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs={'use_reentrant': True}, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, max_length=22000, max_prompt_length=None, max_completion_length=None, beta=0.1, disable_dropout=True, label_pad_token_id=None, padding_value=None, truncation_mode='keep_end', generate_during_eval=False, is_encoder_decoder=False, model_init_kwargs=None, dataset_num_proc=1, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)"
373
  }
 
72
  "custom_register_path": [],
73
  "ignore_args_error": false,
74
  "use_swift_lora": false,
75
+ "output_dir": "/root/dataDisk/llama-new/output/v12-20250126-143619",
76
  "overwrite_output_dir": false,
77
  "do_train": false,
78
  "do_eval": false,
 
83
  "per_device_eval_batch_size": 1,
84
  "per_gpu_train_batch_size": null,
85
  "per_gpu_eval_batch_size": null,
86
+ "gradient_accumulation_steps": 4,
87
  "eval_accumulation_steps": null,
88
  "eval_delay": 0,
89
  "torch_empty_cache_steps": null,
90
+ "learning_rate": 0.0003,
91
  "weight_decay": 0.01,
92
  "adam_beta1": 0.9,
93
  "adam_beta2": 0.999,
94
  "adam_epsilon": 1e-08,
95
  "max_grad_norm": 1.0,
96
+ "num_train_epochs": 5.0,
97
  "max_steps": -1,
98
  "lr_scheduler_type": "linear",
99
  "lr_scheduler_kwargs": null,
100
+ "warmup_ratio": 0.05,
101
  "warmup_steps": 0,
102
  "log_level": "passive",
103
  "log_level_replica": "warning",
104
  "log_on_each_node": true,
105
+ "logging_dir": "/root/dataDisk/llama-new/output/v12-20250126-143619/runs",
106
  "logging_strategy": "steps",
107
  "logging_first_step": true,
108
  "logging_steps": 1,
109
  "logging_nan_inf_filter": true,
110
  "save_strategy": "steps",
111
+ "save_steps": 10.0,
112
  "save_total_limit": 2,
113
  "save_safetensors": true,
114
  "save_on_each_node": false,
 
132
  "tpu_metrics_debug": false,
133
  "debug": null,
134
  "dataloader_drop_last": false,
135
+ "eval_steps": 10.0,
136
  "dataloader_num_workers": 0,
137
  "dataloader_prefetch_factor": null,
138
  "past_index": -1,
 
366
  "local_world_size": 8,
367
  "model_suffix": "v3-20250124-205715-merged",
368
  "model_info": "ModelInfo(model_type='llama3_2', model_dir='/root/highspeedstorage/ft-volume/v3-20250124-205715-merged', torch_dtype=torch.bfloat16, max_model_len=8192, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)",
369
+ "model_meta": "ModelMeta(model_type='llama3_2', model_groups=[ModelGroup(models=[Model(ms_model_id='LLM-Research/Llama-3.2-1B', hf_model_id='meta-llama/Llama-3.2-1B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-3B', hf_model_id='meta-llama/Llama-3.2-3B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-1B-Instruct', hf_model_id='meta-llama/Llama-3.2-1B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-3B-Instruct', hf_model_id='meta-llama/Llama-3.2-3B-Instruct', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[]), ModelGroup(models=[Model(ms_model_id='LLM-Research/Llama-3.3-70B-Instruct', hf_model_id='meta-llama/Llama-3.3-70B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='unsloth/Llama-3.3-70B-Instruct-bnb-4bit', hf_model_id='unsloth/Llama-3.3-70B-Instruct-bnb-4bit', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='llama3_2', get_function=<function get_model_tokenizer_with_flash_attn at 0x7f896b3972e0>, model_arch='llama', architectures=['LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=['transformers>=4.45'], tags=[])",
370
  "model_dir": "/root/highspeedstorage/ft-volume/v3-20250124-205715-merged",
371
  "hub": "<class 'swift.hub.hub.HFHub'>",
372
+ "training_args": "ORPOConfig(output_dir='/root/dataDisk/llama-new/output/v12-20250126-143619', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=<IntervalStrategy.STEPS: 'steps'>, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=4, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0003, weight_decay=0.01, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=5.0, max_steps=-1, lr_scheduler_type=<SchedulerType.LINEAR: 'linear'>, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/root/dataDisk/llama-new/output/v12-20250126-143619/runs', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=True, logging_steps=1, logging_nan_inf_filter=True, save_strategy=<SaveStrategy.STEPS: 'steps'>, save_steps=10, save_total_limit=2, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=0, dataloader_prefetch_factor=None, past_index=-1, run_name='/root/dataDisk/llama-new/output/v12-20250126-143619', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'cpu', 'pin_memory': True}, 'offload_param': {'device': 'cpu', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=<OptimizerNames.ADAMW_TORCH: 'adamw_torch'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['wandb'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=True, resume_from_checkpoint=None, hub_model_id='TheAgenticAI/LLAMA-3.3-70B-Reasoning', hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=True, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs={'use_reentrant': True}, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, max_length=22000, max_prompt_length=None, max_completion_length=None, beta=0.1, disable_dropout=True, label_pad_token_id=None, padding_value=None, truncation_mode='keep_end', generate_during_eval=False, is_encoder_decoder=False, model_init_kwargs=None, dataset_num_proc=1, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)"
373
  }
logging.jsonl CHANGED
@@ -1,8 +1,12 @@
1
- {"loss": 0.96603394, "grad_norm": 0.09358346, "learning_rate": 5e-05, "memory(GiB)": 98.18, "train_speed(iter/s)": 0.00256, "rewards/chosen": -0.08843994, "rewards/rejected": -0.03957367, "rewards/accuracies": 0.0, "rewards/margins": -0.04885864, "logps/rejected": -0.3956604, "logps/chosen": -0.88378906, "logits/rejected": -0.18650818, "logits/chosen": -0.44784546, "nll_loss": 0.88244629, "log_odds_ratio": -1.39306641, "log_odds_chosen": -1.03417969, "epoch": 0.45714286, "global_step/max_steps": "1/6", "percentage": "16.67%", "elapsed_time": "6m 15s", "remaining_time": "31m 19s"}
2
- {"loss": 1.07710266, "grad_norm": 0.07397991, "learning_rate": 4e-05, "memory(GiB)": 120.78, "train_speed(iter/s)": 0.00248, "rewards/chosen": -0.10545349, "rewards/rejected": -0.04308319, "rewards/accuracies": 0.0625, "rewards/margins": -0.06238556, "logps/rejected": -0.4307251, "logps/chosen": -1.0546875, "logits/rejected": -0.21848679, "logits/chosen": -0.51165771, "nll_loss": 1.05371094, "log_odds_ratio": -1.47875977, "log_odds_chosen": -1.08789062, "epoch": 0.91428571, "global_step/max_steps": "2/6", "percentage": "33.33%", "elapsed_time": "13m 11s", "remaining_time": "26m 23s"}
3
- {"loss": 0.95780945, "grad_norm": 0.10560518, "learning_rate": 3e-05, "memory(GiB)": 120.78, "train_speed(iter/s)": 0.002382, "rewards/chosen": -0.06476508, "rewards/rejected": -0.04706489, "rewards/accuracies": 0.16666667, "rewards/margins": -0.0177002, "logps/rejected": -0.47081164, "logps/chosen": -0.64735246, "logits/rejected": -0.47363281, "logits/chosen": -0.76638454, "nll_loss": 0.64691842, "log_odds_ratio": -0.9453125, "log_odds_chosen": -0.42122397, "epoch": 1.45714286, "global_step/max_steps": "3/6", "percentage": "50.00%", "elapsed_time": "20m 44s", "remaining_time": "20m 44s"}
4
- {"loss": 0.9289856, "grad_norm": 0.03600949, "learning_rate": 2e-05, "memory(GiB)": 120.78, "train_speed(iter/s)": 0.002416, "rewards/chosen": -0.08105469, "rewards/rejected": -0.04862213, "rewards/accuracies": 0.0625, "rewards/margins": -0.03243256, "logps/rejected": -0.48632812, "logps/chosen": -0.81054688, "logits/rejected": -0.47827148, "logits/chosen": -0.81054688, "nll_loss": 0.81054688, "log_odds_ratio": -1.11035156, "log_odds_chosen": -0.64709473, "epoch": 1.91428571, "global_step/max_steps": "4/6", "percentage": "66.67%", "elapsed_time": "27m 20s", "remaining_time": "13m 40s"}
5
- {"loss": 0.94372559, "grad_norm": 0.03234995, "learning_rate": 1e-05, "memory(GiB)": 120.78, "train_speed(iter/s)": 0.002419, "rewards/chosen": -0.06709798, "rewards/rejected": -0.04815674, "rewards/accuracies": 0.16666667, "rewards/margins": -0.01894803, "logps/rejected": -0.48166233, "logps/chosen": -0.67122394, "logits/rejected": -0.45073783, "logits/chosen": -0.82421875, "nll_loss": 0.6715495, "log_odds_ratio": -0.96115452, "log_odds_chosen": -0.44466147, "epoch": 2.45714286, "global_step/max_steps": "5/6", "percentage": "83.33%", "elapsed_time": "34m 12s", "remaining_time": "6m 50s"}
6
- {"loss": 0.78184509, "grad_norm": 0.02222378, "learning_rate": 0.0, "memory(GiB)": 120.78, "train_speed(iter/s)": 0.002446, "rewards/chosen": -0.07427979, "rewards/rejected": -0.04429626, "rewards/accuracies": 0.0625, "rewards/margins": -0.0300293, "logps/rejected": -0.44299316, "logps/chosen": -0.74291992, "logits/rejected": -0.43395996, "logits/chosen": -0.81304932, "nll_loss": 0.74328613, "log_odds_ratio": -1.11694336, "log_odds_chosen": -0.65979004, "epoch": 2.91428571, "global_step/max_steps": "6/6", "percentage": "100.00%", "elapsed_time": "40m 38s", "remaining_time": "0s"}
7
- {"eval_loss": 1.1171875, "eval_runtime": 5.8635, "eval_samples_per_second": 0.341, "eval_steps_per_second": 0.171, "eval_rewards/chosen": -0.06933594, "eval_rewards/rejected": -0.04882812, "eval_rewards/accuracies": 0.0, "eval_rewards/margins": -0.02050781, "eval_logps/rejected": -0.48828125, "eval_logps/chosen": -0.69140625, "eval_logits/rejected": -0.58203125, "eval_logits/chosen": -0.59375, "eval_nll_loss": 0.6875, "eval_log_odds_ratio": -0.94921875, "eval_log_odds_chosen": -0.4609375, "epoch": 2.91428571, "global_step/max_steps": "6/6", "percentage": "100.00%", "elapsed_time": "40m 43s", "remaining_time": "0s"}
8
- {"train_runtime": 2471.1137, "train_samples_per_second": 0.333, "train_steps_per_second": 0.002, "total_flos": 65359310749696.0, "train_loss": 0.94258372, "epoch": 2.91428571, "global_step/max_steps": "6/6", "percentage": "100.00%", "elapsed_time": "41m 9s", "remaining_time": "0s"}
 
 
 
 
 
1
+ {"loss": 0.93286133, "grad_norm": 0.24253744, "learning_rate": 0.00015, "memory(GiB)": 94.72, "train_speed(iter/s)": 0.007964, "rewards/chosen": -0.06799316, "rewards/rejected": -0.03457642, "rewards/accuracies": 0.0, "rewards/margins": -0.03341675, "logps/rejected": -0.34570312, "logps/chosen": -0.68066406, "logits/rejected": -0.13452148, "logits/chosen": -0.43457031, "nll_loss": 0.68066406, "log_odds_ratio": -1.23046875, "log_odds_chosen": -0.87792969, "epoch": 0.11428571, "global_step/max_steps": "1/40", "percentage": "2.50%", "elapsed_time": "1m 47s", "remaining_time": "1h 9m 42s"}
2
+ {"loss": 1.08764648, "grad_norm": 0.64228505, "learning_rate": 0.0003, "memory(GiB)": 94.72, "train_speed(iter/s)": 0.009138, "rewards/chosen": -0.06176758, "rewards/rejected": -0.04656982, "rewards/accuracies": 0.0, "rewards/margins": -0.01519775, "logps/rejected": -0.46533203, "logps/chosen": -0.6171875, "logits/rejected": -0.22705078, "logits/chosen": -0.25598145, "nll_loss": 0.6171875, "log_odds_ratio": -0.88964844, "log_odds_chosen": -0.35449219, "epoch": 0.22857143, "global_step/max_steps": "2/40", "percentage": "5.00%", "elapsed_time": "3m 20s", "remaining_time": "1h 3m 30s"}
3
+ {"loss": 1.34863281, "grad_norm": 4.59718227, "learning_rate": 0.00029211, "memory(GiB)": 94.72, "train_speed(iter/s)": 0.009641, "rewards/chosen": -0.15454102, "rewards/rejected": -0.12097168, "rewards/accuracies": 0.25, "rewards/margins": -0.03356934, "logps/rejected": -1.20898438, "logps/chosen": -1.54492188, "logits/rejected": -0.74609375, "logits/chosen": -1.19921875, "nll_loss": 1.54589844, "log_odds_ratio": -0.96972656, "log_odds_chosen": -0.39697266, "epoch": 0.34285714, "global_step/max_steps": "3/40", "percentage": "7.50%", "elapsed_time": "4m 52s", "remaining_time": "1h 0m 11s"}
4
+ {"loss": 1.03009033, "grad_norm": 0.27393061, "learning_rate": 0.00028421, "memory(GiB)": 105.24, "train_speed(iter/s)": 0.009599, "rewards/chosen": -0.07324219, "rewards/rejected": -0.04803467, "rewards/accuracies": 0.0, "rewards/margins": -0.02520752, "logps/rejected": -0.47998047, "logps/chosen": -0.73339844, "logits/rejected": 0.04180908, "logits/chosen": -0.58544922, "nll_loss": 0.73242188, "log_odds_ratio": -1.01660156, "log_odds_chosen": -0.54833984, "epoch": 0.45714286, "global_step/max_steps": "4/40", "percentage": "10.00%", "elapsed_time": "6m 38s", "remaining_time": "59m 45s"}
5
+ {"loss": 3.07617188, "grad_norm": 68.15390015, "learning_rate": 0.00027632, "memory(GiB)": 105.24, "train_speed(iter/s)": 0.009684, "rewards/chosen": -0.30126953, "rewards/rejected": -0.29296875, "rewards/accuracies": 0.25, "rewards/margins": -0.00830078, "logps/rejected": -2.92578125, "logps/chosen": -3.01171875, "logits/rejected": -0.52734375, "logits/chosen": -1.07714844, "nll_loss": 3.01171875, "log_odds_ratio": -0.74511719, "log_odds_chosen": -0.09191895, "epoch": 0.57142857, "global_step/max_steps": "5/40", "percentage": "12.50%", "elapsed_time": "8m 17s", "remaining_time": "58m 5s"}
6
+ {"loss": 7.05078125, "grad_norm": 194.47937012, "learning_rate": 0.00026842, "memory(GiB)": 105.24, "train_speed(iter/s)": 0.0099, "rewards/chosen": -0.68261719, "rewards/rejected": -0.68359375, "rewards/accuracies": 0.5, "rewards/margins": 0.00097656, "logps/rejected": -6.8359375, "logps/chosen": -6.8203125, "logits/rejected": -2.94140625, "logits/chosen": -2.94140625, "nll_loss": 6.8359375, "log_odds_ratio": -0.6875, "log_odds_chosen": 0.015625, "epoch": 0.68571429, "global_step/max_steps": "6/40", "percentage": "15.00%", "elapsed_time": "9m 47s", "remaining_time": "55m 30s"}
7
+ {"loss": 9.6640625, "grad_norm": 44.40390396, "learning_rate": 0.00026053, "memory(GiB)": 105.24, "train_speed(iter/s)": 0.010026, "rewards/chosen": -0.96972656, "rewards/rejected": -0.96972656, "rewards/accuracies": 0.5, "rewards/margins": 0.0, "logps/rejected": -9.6875, "logps/chosen": -9.703125, "logits/rejected": -2.18554688, "logits/chosen": -2.17382812, "nll_loss": 9.6875, "log_odds_ratio": -0.703125, "log_odds_chosen": -0.015625, "epoch": 0.8, "global_step/max_steps": "7/40", "percentage": "17.50%", "elapsed_time": "11m 19s", "remaining_time": "53m 25s"}
8
+ {"loss": 9.8203125, "grad_norm": 14.2842617, "learning_rate": 0.00025263, "memory(GiB)": 105.24, "train_speed(iter/s)": 0.009441, "rewards/chosen": -0.96386719, "rewards/rejected": -0.96191406, "rewards/accuracies": 0.5, "rewards/margins": -0.00195312, "logps/rejected": -9.625, "logps/chosen": -9.640625, "logits/rejected": -2.12890625, "logits/chosen": -2.1328125, "nll_loss": 9.609375, "log_odds_ratio": -0.7109375, "log_odds_chosen": -0.015625, "epoch": 0.91428571, "global_step/max_steps": "8/40", "percentage": "20.00%", "elapsed_time": "13m 48s", "remaining_time": "55m 15s"}
9
+ {"loss": 6.10742188, "grad_norm": 14.2842617, "learning_rate": 0.00024474, "memory(GiB)": 105.24, "train_speed(iter/s)": 0.009823, "rewards/chosen": -0.79427081, "rewards/rejected": -0.79166669, "rewards/accuracies": 0.33333334, "rewards/margins": -0.00260417, "logps/rejected": -7.91666651, "logps/chosen": -7.9375, "logits/rejected": -2.09375, "logits/chosen": -2.078125, "nll_loss": 7.9375, "log_odds_ratio": -0.70572919, "log_odds_chosen": -0.02083333, "epoch": 1.0, "global_step/max_steps": "9/40", "percentage": "22.50%", "elapsed_time": "14m 57s", "remaining_time": "51m 32s"}
10
+ {"loss": 8.80078125, "grad_norm": 20.5291481, "learning_rate": 0.00023684, "memory(GiB)": 105.24, "train_speed(iter/s)": 0.009473, "rewards/chosen": -0.86230469, "rewards/rejected": -0.84472656, "rewards/accuracies": 0.25, "rewards/margins": -0.01757812, "logps/rejected": -8.4453125, "logps/chosen": -8.625, "logits/rejected": -3.41015625, "logits/chosen": -3.34375, "nll_loss": 8.578125, "log_odds_ratio": -0.79101562, "log_odds_chosen": -0.1796875, "epoch": 1.11428571, "global_step/max_steps": "10/40", "percentage": "25.00%", "elapsed_time": "17m 17s", "remaining_time": "51m 51s"}
11
+ {"eval_loss": 9.0625, "eval_runtime": 5.0329, "eval_samples_per_second": 0.397, "eval_steps_per_second": 0.199, "eval_rewards/chosen": -0.89453125, "eval_rewards/rejected": -0.90625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 0.01171875, "eval_logps/rejected": -9.0625, "eval_logps/chosen": -8.9375, "eval_logits/rejected": -3.84375, "eval_logits/chosen": -3.78125, "eval_nll_loss": 8.9375, "eval_log_odds_ratio": -0.6328125, "eval_log_odds_chosen": 0.125, "epoch": 1.11428571, "global_step/max_steps": "10/40", "percentage": "25.00%", "elapsed_time": "17m 22s", "remaining_time": "52m 6s"}
12
+ {"loss": 9.82421875, "grad_norm": 46.37237549, "learning_rate": 0.00022895, "memory(GiB)": 122.43, "train_speed(iter/s)": 0.009244, "rewards/chosen": -0.96582031, "rewards/rejected": -0.90820312, "rewards/accuracies": 0.0, "rewards/margins": -0.05761719, "logps/rejected": -9.09375, "logps/chosen": -9.65625, "logits/rejected": -2.69921875, "logits/chosen": -2.65625, "nll_loss": 9.671875, "log_odds_ratio": -1.06347656, "log_odds_chosen": -0.5625, "epoch": 1.22857143, "global_step/max_steps": "11/40", "percentage": "27.50%", "elapsed_time": "19m 31s", "remaining_time": "51m 28s"}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:555a621d1a7a55ef965b07972fdea651660831674732ccb543abe2d3845575a0
3
  size 8312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d9699d2a8d44cb87a3292c17458643e17e3de9eae2e502e765f7083dbe5e19d
3
  size 8312