Training in progress, step 10
Browse files- adapter_config.json +3 -3
- adapter_model.safetensors +1 -1
- args.json +10 -10
- logging.jsonl +12 -8
- training_args.bin +1 -1
adapter_config.json
CHANGED
|
@@ -25,11 +25,11 @@
|
|
| 25 |
"target_modules": [
|
| 26 |
"k_proj",
|
| 27 |
"gate_proj",
|
|
|
|
|
|
|
| 28 |
"up_proj",
|
| 29 |
-
"down_proj",
|
| 30 |
"v_proj",
|
| 31 |
-
"
|
| 32 |
-
"o_proj"
|
| 33 |
],
|
| 34 |
"task_type": "CAUSAL_LM",
|
| 35 |
"use_dora": false,
|
|
|
|
| 25 |
"target_modules": [
|
| 26 |
"k_proj",
|
| 27 |
"gate_proj",
|
| 28 |
+
"o_proj",
|
| 29 |
+
"q_proj",
|
| 30 |
"up_proj",
|
|
|
|
| 31 |
"v_proj",
|
| 32 |
+
"down_proj"
|
|
|
|
| 33 |
],
|
| 34 |
"task_type": "CAUSAL_LM",
|
| 35 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6627156248
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d05fc15dabd7abb19233eb4f243c91c5572426a29ca8049df3db628a3458c26
|
| 3 |
size 6627156248
|
args.json
CHANGED
|
@@ -72,7 +72,7 @@
|
|
| 72 |
"custom_register_path": [],
|
| 73 |
"ignore_args_error": false,
|
| 74 |
"use_swift_lora": false,
|
| 75 |
-
"output_dir": "/root/dataDisk/llama-new/output/
|
| 76 |
"overwrite_output_dir": false,
|
| 77 |
"do_train": false,
|
| 78 |
"do_eval": false,
|
|
@@ -83,32 +83,32 @@
|
|
| 83 |
"per_device_eval_batch_size": 1,
|
| 84 |
"per_gpu_train_batch_size": null,
|
| 85 |
"per_gpu_eval_batch_size": null,
|
| 86 |
-
"gradient_accumulation_steps":
|
| 87 |
"eval_accumulation_steps": null,
|
| 88 |
"eval_delay": 0,
|
| 89 |
"torch_empty_cache_steps": null,
|
| 90 |
-
"learning_rate":
|
| 91 |
"weight_decay": 0.01,
|
| 92 |
"adam_beta1": 0.9,
|
| 93 |
"adam_beta2": 0.999,
|
| 94 |
"adam_epsilon": 1e-08,
|
| 95 |
"max_grad_norm": 1.0,
|
| 96 |
-
"num_train_epochs":
|
| 97 |
"max_steps": -1,
|
| 98 |
"lr_scheduler_type": "linear",
|
| 99 |
"lr_scheduler_kwargs": null,
|
| 100 |
-
"warmup_ratio": 0.
|
| 101 |
"warmup_steps": 0,
|
| 102 |
"log_level": "passive",
|
| 103 |
"log_level_replica": "warning",
|
| 104 |
"log_on_each_node": true,
|
| 105 |
-
"logging_dir": "/root/dataDisk/llama-new/output/
|
| 106 |
"logging_strategy": "steps",
|
| 107 |
"logging_first_step": true,
|
| 108 |
"logging_steps": 1,
|
| 109 |
"logging_nan_inf_filter": true,
|
| 110 |
"save_strategy": "steps",
|
| 111 |
-
"save_steps":
|
| 112 |
"save_total_limit": 2,
|
| 113 |
"save_safetensors": true,
|
| 114 |
"save_on_each_node": false,
|
|
@@ -132,7 +132,7 @@
|
|
| 132 |
"tpu_metrics_debug": false,
|
| 133 |
"debug": null,
|
| 134 |
"dataloader_drop_last": false,
|
| 135 |
-
"eval_steps":
|
| 136 |
"dataloader_num_workers": 0,
|
| 137 |
"dataloader_prefetch_factor": null,
|
| 138 |
"past_index": -1,
|
|
@@ -366,8 +366,8 @@
|
|
| 366 |
"local_world_size": 8,
|
| 367 |
"model_suffix": "v3-20250124-205715-merged",
|
| 368 |
"model_info": "ModelInfo(model_type='llama3_2', model_dir='/root/highspeedstorage/ft-volume/v3-20250124-205715-merged', torch_dtype=torch.bfloat16, max_model_len=8192, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)",
|
| 369 |
-
"model_meta": "ModelMeta(model_type='llama3_2', model_groups=[ModelGroup(models=[Model(ms_model_id='LLM-Research/Llama-3.2-1B', hf_model_id='meta-llama/Llama-3.2-1B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-3B', hf_model_id='meta-llama/Llama-3.2-3B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-1B-Instruct', hf_model_id='meta-llama/Llama-3.2-1B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-3B-Instruct', hf_model_id='meta-llama/Llama-3.2-3B-Instruct', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[]), ModelGroup(models=[Model(ms_model_id='LLM-Research/Llama-3.3-70B-Instruct', hf_model_id='meta-llama/Llama-3.3-70B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='unsloth/Llama-3.3-70B-Instruct-bnb-4bit', hf_model_id='unsloth/Llama-3.3-70B-Instruct-bnb-4bit', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='llama3_2', get_function=<function get_model_tokenizer_with_flash_attn at
|
| 370 |
"model_dir": "/root/highspeedstorage/ft-volume/v3-20250124-205715-merged",
|
| 371 |
"hub": "<class 'swift.hub.hub.HFHub'>",
|
| 372 |
-
"training_args": "ORPOConfig(output_dir='/root/dataDisk/llama-new/output/
|
| 373 |
}
|
|
|
|
| 72 |
"custom_register_path": [],
|
| 73 |
"ignore_args_error": false,
|
| 74 |
"use_swift_lora": false,
|
| 75 |
+
"output_dir": "/root/dataDisk/llama-new/output/v12-20250126-143619",
|
| 76 |
"overwrite_output_dir": false,
|
| 77 |
"do_train": false,
|
| 78 |
"do_eval": false,
|
|
|
|
| 83 |
"per_device_eval_batch_size": 1,
|
| 84 |
"per_gpu_train_batch_size": null,
|
| 85 |
"per_gpu_eval_batch_size": null,
|
| 86 |
+
"gradient_accumulation_steps": 4,
|
| 87 |
"eval_accumulation_steps": null,
|
| 88 |
"eval_delay": 0,
|
| 89 |
"torch_empty_cache_steps": null,
|
| 90 |
+
"learning_rate": 0.0003,
|
| 91 |
"weight_decay": 0.01,
|
| 92 |
"adam_beta1": 0.9,
|
| 93 |
"adam_beta2": 0.999,
|
| 94 |
"adam_epsilon": 1e-08,
|
| 95 |
"max_grad_norm": 1.0,
|
| 96 |
+
"num_train_epochs": 5.0,
|
| 97 |
"max_steps": -1,
|
| 98 |
"lr_scheduler_type": "linear",
|
| 99 |
"lr_scheduler_kwargs": null,
|
| 100 |
+
"warmup_ratio": 0.05,
|
| 101 |
"warmup_steps": 0,
|
| 102 |
"log_level": "passive",
|
| 103 |
"log_level_replica": "warning",
|
| 104 |
"log_on_each_node": true,
|
| 105 |
+
"logging_dir": "/root/dataDisk/llama-new/output/v12-20250126-143619/runs",
|
| 106 |
"logging_strategy": "steps",
|
| 107 |
"logging_first_step": true,
|
| 108 |
"logging_steps": 1,
|
| 109 |
"logging_nan_inf_filter": true,
|
| 110 |
"save_strategy": "steps",
|
| 111 |
+
"save_steps": 10.0,
|
| 112 |
"save_total_limit": 2,
|
| 113 |
"save_safetensors": true,
|
| 114 |
"save_on_each_node": false,
|
|
|
|
| 132 |
"tpu_metrics_debug": false,
|
| 133 |
"debug": null,
|
| 134 |
"dataloader_drop_last": false,
|
| 135 |
+
"eval_steps": 10.0,
|
| 136 |
"dataloader_num_workers": 0,
|
| 137 |
"dataloader_prefetch_factor": null,
|
| 138 |
"past_index": -1,
|
|
|
|
| 366 |
"local_world_size": 8,
|
| 367 |
"model_suffix": "v3-20250124-205715-merged",
|
| 368 |
"model_info": "ModelInfo(model_type='llama3_2', model_dir='/root/highspeedstorage/ft-volume/v3-20250124-205715-merged', torch_dtype=torch.bfloat16, max_model_len=8192, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)",
|
| 369 |
+
"model_meta": "ModelMeta(model_type='llama3_2', model_groups=[ModelGroup(models=[Model(ms_model_id='LLM-Research/Llama-3.2-1B', hf_model_id='meta-llama/Llama-3.2-1B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-3B', hf_model_id='meta-llama/Llama-3.2-3B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-1B-Instruct', hf_model_id='meta-llama/Llama-3.2-1B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-3B-Instruct', hf_model_id='meta-llama/Llama-3.2-3B-Instruct', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[]), ModelGroup(models=[Model(ms_model_id='LLM-Research/Llama-3.3-70B-Instruct', hf_model_id='meta-llama/Llama-3.3-70B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='unsloth/Llama-3.3-70B-Instruct-bnb-4bit', hf_model_id='unsloth/Llama-3.3-70B-Instruct-bnb-4bit', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='llama3_2', get_function=<function get_model_tokenizer_with_flash_attn at 0x7f896b3972e0>, model_arch='llama', architectures=['LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=['transformers>=4.45'], tags=[])",
|
| 370 |
"model_dir": "/root/highspeedstorage/ft-volume/v3-20250124-205715-merged",
|
| 371 |
"hub": "<class 'swift.hub.hub.HFHub'>",
|
| 372 |
+
"training_args": "ORPOConfig(output_dir='/root/dataDisk/llama-new/output/v12-20250126-143619', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=<IntervalStrategy.STEPS: 'steps'>, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=4, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0003, weight_decay=0.01, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=5.0, max_steps=-1, lr_scheduler_type=<SchedulerType.LINEAR: 'linear'>, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/root/dataDisk/llama-new/output/v12-20250126-143619/runs', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=True, logging_steps=1, logging_nan_inf_filter=True, save_strategy=<SaveStrategy.STEPS: 'steps'>, save_steps=10, save_total_limit=2, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=0, dataloader_prefetch_factor=None, past_index=-1, run_name='/root/dataDisk/llama-new/output/v12-20250126-143619', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'cpu', 'pin_memory': True}, 'offload_param': {'device': 'cpu', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=<OptimizerNames.ADAMW_TORCH: 'adamw_torch'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['wandb'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=True, resume_from_checkpoint=None, hub_model_id='TheAgenticAI/LLAMA-3.3-70B-Reasoning', hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=True, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs={'use_reentrant': True}, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, max_length=22000, max_prompt_length=None, max_completion_length=None, beta=0.1, disable_dropout=True, label_pad_token_id=None, padding_value=None, truncation_mode='keep_end', generate_during_eval=False, is_encoder_decoder=False, model_init_kwargs=None, dataset_num_proc=1, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)"
|
| 373 |
}
|
logging.jsonl
CHANGED
|
@@ -1,8 +1,12 @@
|
|
| 1 |
-
{"loss": 0.
|
| 2 |
-
{"loss": 1.
|
| 3 |
-
{"loss":
|
| 4 |
-
{"loss":
|
| 5 |
-
{"loss":
|
| 6 |
-
{"loss":
|
| 7 |
-
{"
|
| 8 |
-
{"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"loss": 0.93286133, "grad_norm": 0.24253744, "learning_rate": 0.00015, "memory(GiB)": 94.72, "train_speed(iter/s)": 0.007964, "rewards/chosen": -0.06799316, "rewards/rejected": -0.03457642, "rewards/accuracies": 0.0, "rewards/margins": -0.03341675, "logps/rejected": -0.34570312, "logps/chosen": -0.68066406, "logits/rejected": -0.13452148, "logits/chosen": -0.43457031, "nll_loss": 0.68066406, "log_odds_ratio": -1.23046875, "log_odds_chosen": -0.87792969, "epoch": 0.11428571, "global_step/max_steps": "1/40", "percentage": "2.50%", "elapsed_time": "1m 47s", "remaining_time": "1h 9m 42s"}
|
| 2 |
+
{"loss": 1.08764648, "grad_norm": 0.64228505, "learning_rate": 0.0003, "memory(GiB)": 94.72, "train_speed(iter/s)": 0.009138, "rewards/chosen": -0.06176758, "rewards/rejected": -0.04656982, "rewards/accuracies": 0.0, "rewards/margins": -0.01519775, "logps/rejected": -0.46533203, "logps/chosen": -0.6171875, "logits/rejected": -0.22705078, "logits/chosen": -0.25598145, "nll_loss": 0.6171875, "log_odds_ratio": -0.88964844, "log_odds_chosen": -0.35449219, "epoch": 0.22857143, "global_step/max_steps": "2/40", "percentage": "5.00%", "elapsed_time": "3m 20s", "remaining_time": "1h 3m 30s"}
|
| 3 |
+
{"loss": 1.34863281, "grad_norm": 4.59718227, "learning_rate": 0.00029211, "memory(GiB)": 94.72, "train_speed(iter/s)": 0.009641, "rewards/chosen": -0.15454102, "rewards/rejected": -0.12097168, "rewards/accuracies": 0.25, "rewards/margins": -0.03356934, "logps/rejected": -1.20898438, "logps/chosen": -1.54492188, "logits/rejected": -0.74609375, "logits/chosen": -1.19921875, "nll_loss": 1.54589844, "log_odds_ratio": -0.96972656, "log_odds_chosen": -0.39697266, "epoch": 0.34285714, "global_step/max_steps": "3/40", "percentage": "7.50%", "elapsed_time": "4m 52s", "remaining_time": "1h 0m 11s"}
|
| 4 |
+
{"loss": 1.03009033, "grad_norm": 0.27393061, "learning_rate": 0.00028421, "memory(GiB)": 105.24, "train_speed(iter/s)": 0.009599, "rewards/chosen": -0.07324219, "rewards/rejected": -0.04803467, "rewards/accuracies": 0.0, "rewards/margins": -0.02520752, "logps/rejected": -0.47998047, "logps/chosen": -0.73339844, "logits/rejected": 0.04180908, "logits/chosen": -0.58544922, "nll_loss": 0.73242188, "log_odds_ratio": -1.01660156, "log_odds_chosen": -0.54833984, "epoch": 0.45714286, "global_step/max_steps": "4/40", "percentage": "10.00%", "elapsed_time": "6m 38s", "remaining_time": "59m 45s"}
|
| 5 |
+
{"loss": 3.07617188, "grad_norm": 68.15390015, "learning_rate": 0.00027632, "memory(GiB)": 105.24, "train_speed(iter/s)": 0.009684, "rewards/chosen": -0.30126953, "rewards/rejected": -0.29296875, "rewards/accuracies": 0.25, "rewards/margins": -0.00830078, "logps/rejected": -2.92578125, "logps/chosen": -3.01171875, "logits/rejected": -0.52734375, "logits/chosen": -1.07714844, "nll_loss": 3.01171875, "log_odds_ratio": -0.74511719, "log_odds_chosen": -0.09191895, "epoch": 0.57142857, "global_step/max_steps": "5/40", "percentage": "12.50%", "elapsed_time": "8m 17s", "remaining_time": "58m 5s"}
|
| 6 |
+
{"loss": 7.05078125, "grad_norm": 194.47937012, "learning_rate": 0.00026842, "memory(GiB)": 105.24, "train_speed(iter/s)": 0.0099, "rewards/chosen": -0.68261719, "rewards/rejected": -0.68359375, "rewards/accuracies": 0.5, "rewards/margins": 0.00097656, "logps/rejected": -6.8359375, "logps/chosen": -6.8203125, "logits/rejected": -2.94140625, "logits/chosen": -2.94140625, "nll_loss": 6.8359375, "log_odds_ratio": -0.6875, "log_odds_chosen": 0.015625, "epoch": 0.68571429, "global_step/max_steps": "6/40", "percentage": "15.00%", "elapsed_time": "9m 47s", "remaining_time": "55m 30s"}
|
| 7 |
+
{"loss": 9.6640625, "grad_norm": 44.40390396, "learning_rate": 0.00026053, "memory(GiB)": 105.24, "train_speed(iter/s)": 0.010026, "rewards/chosen": -0.96972656, "rewards/rejected": -0.96972656, "rewards/accuracies": 0.5, "rewards/margins": 0.0, "logps/rejected": -9.6875, "logps/chosen": -9.703125, "logits/rejected": -2.18554688, "logits/chosen": -2.17382812, "nll_loss": 9.6875, "log_odds_ratio": -0.703125, "log_odds_chosen": -0.015625, "epoch": 0.8, "global_step/max_steps": "7/40", "percentage": "17.50%", "elapsed_time": "11m 19s", "remaining_time": "53m 25s"}
|
| 8 |
+
{"loss": 9.8203125, "grad_norm": 14.2842617, "learning_rate": 0.00025263, "memory(GiB)": 105.24, "train_speed(iter/s)": 0.009441, "rewards/chosen": -0.96386719, "rewards/rejected": -0.96191406, "rewards/accuracies": 0.5, "rewards/margins": -0.00195312, "logps/rejected": -9.625, "logps/chosen": -9.640625, "logits/rejected": -2.12890625, "logits/chosen": -2.1328125, "nll_loss": 9.609375, "log_odds_ratio": -0.7109375, "log_odds_chosen": -0.015625, "epoch": 0.91428571, "global_step/max_steps": "8/40", "percentage": "20.00%", "elapsed_time": "13m 48s", "remaining_time": "55m 15s"}
|
| 9 |
+
{"loss": 6.10742188, "grad_norm": 14.2842617, "learning_rate": 0.00024474, "memory(GiB)": 105.24, "train_speed(iter/s)": 0.009823, "rewards/chosen": -0.79427081, "rewards/rejected": -0.79166669, "rewards/accuracies": 0.33333334, "rewards/margins": -0.00260417, "logps/rejected": -7.91666651, "logps/chosen": -7.9375, "logits/rejected": -2.09375, "logits/chosen": -2.078125, "nll_loss": 7.9375, "log_odds_ratio": -0.70572919, "log_odds_chosen": -0.02083333, "epoch": 1.0, "global_step/max_steps": "9/40", "percentage": "22.50%", "elapsed_time": "14m 57s", "remaining_time": "51m 32s"}
|
| 10 |
+
{"loss": 8.80078125, "grad_norm": 20.5291481, "learning_rate": 0.00023684, "memory(GiB)": 105.24, "train_speed(iter/s)": 0.009473, "rewards/chosen": -0.86230469, "rewards/rejected": -0.84472656, "rewards/accuracies": 0.25, "rewards/margins": -0.01757812, "logps/rejected": -8.4453125, "logps/chosen": -8.625, "logits/rejected": -3.41015625, "logits/chosen": -3.34375, "nll_loss": 8.578125, "log_odds_ratio": -0.79101562, "log_odds_chosen": -0.1796875, "epoch": 1.11428571, "global_step/max_steps": "10/40", "percentage": "25.00%", "elapsed_time": "17m 17s", "remaining_time": "51m 51s"}
|
| 11 |
+
{"eval_loss": 9.0625, "eval_runtime": 5.0329, "eval_samples_per_second": 0.397, "eval_steps_per_second": 0.199, "eval_rewards/chosen": -0.89453125, "eval_rewards/rejected": -0.90625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 0.01171875, "eval_logps/rejected": -9.0625, "eval_logps/chosen": -8.9375, "eval_logits/rejected": -3.84375, "eval_logits/chosen": -3.78125, "eval_nll_loss": 8.9375, "eval_log_odds_ratio": -0.6328125, "eval_log_odds_chosen": 0.125, "epoch": 1.11428571, "global_step/max_steps": "10/40", "percentage": "25.00%", "elapsed_time": "17m 22s", "remaining_time": "52m 6s"}
|
| 12 |
+
{"loss": 9.82421875, "grad_norm": 46.37237549, "learning_rate": 0.00022895, "memory(GiB)": 122.43, "train_speed(iter/s)": 0.009244, "rewards/chosen": -0.96582031, "rewards/rejected": -0.90820312, "rewards/accuracies": 0.0, "rewards/margins": -0.05761719, "logps/rejected": -9.09375, "logps/chosen": -9.65625, "logits/rejected": -2.69921875, "logits/chosen": -2.65625, "nll_loss": 9.671875, "log_odds_ratio": -1.06347656, "log_odds_chosen": -0.5625, "epoch": 1.22857143, "global_step/max_steps": "11/40", "percentage": "27.50%", "elapsed_time": "19m 31s", "remaining_time": "51m 28s"}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 8312
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d9699d2a8d44cb87a3292c17458643e17e3de9eae2e502e765f7083dbe5e19d
|
| 3 |
size 8312
|