diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/args.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/args.json new file mode 100644 index 0000000000000000000000000000000000000000..b3b67557396afa371bd4ae4e246e102ed2523a72 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-14b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/README.md b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c5c0404813bb3d884c2d0750e24391042738c029 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-14b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0f5ad0d738a546c5dfcb69e5366b5e203ca99083 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "o_proj", + "v_proj", + "q_proj", + "up_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3150437cb5600b99909d5ef26ad5a048bba0a79 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65c1454e9b23f574813a0d8f37c9d315eb6270429df2915dd88c0e3df28b1dd0 +size 68902296 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/args.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/args.json new file mode 100644 index 0000000000000000000000000000000000000000..b3b67557396afa371bd4ae4e246e102ed2523a72 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-14b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c510420f475abb36c21036383d72f00932294355 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aee5c5d3fb601ea1ecadf2a3915651d9f3bec32bd965d959462544397fd65cc0 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..90de5e6cef3b48ea4df20b5cc41a904d572ba9f6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ec8aa314d0c8b9a55ae5dcec6b210766acadd652175e0d85033f972c697ae11 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..088eff48ab5af5970dd9156f2a1637152101a083 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:939638af361ce4b7c5d4bbb02e19af24e502a663a84b16ad2482eb17c342f403 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ba02c83ab101374202d96d797d3e1337b2e91bc --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:706eff2fb74340a596775dac6b41bb82d149b2e5f141739be14f0e9e6d9fd2c0 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea68315a2928c4d2e6f27ccc7d88eca58d97822f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:267a7761f2150238fe697a6f24edb96b892ec83313ab6313281986ed648c7c05 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a8c20b1c4540ccafc7943ada6794901db6b1e36 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d11adf355c5a8a37aa5dcf312f28464e47aef16170a6bcea4cc6a23a9b6a5e +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..de1da3c3eb77c4069fa0776edee4b2451ffefa6b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a957c96daf8f86ce8ba38d1103ebe71e47bb4f97de1f1eb7a72bfbabaacc2e5 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..339421ef01ff81fb2e2ecb025f6004177f2e6e68 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca20ce3541c5f2ed1a94341a419e0857a19f4409b2748ded4c7367b21d36d4a6 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca9e149d406f3ba060df00662843e208114186d6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:135a7241ef86deb44c06cb46106b144c6e0e0c6012a18d83a3b9900ee9887115 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..70a1db716db77b20ce69e020211bd624d917fdc9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:192dbf9a4ce2f4bf6aa21f7b28309443ecc22114d4c26ab7c31833ee4fd8ed6f +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..633a89f6bcf368edb6854d00497f4df874d8754c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cf6cb84dcdbdfc470e75fcec2424f47825bfc445fc2f5c189a110fd89794a1 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f3a20ba8148315a06d9b6627ed5ee3d3db5255c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:241cf79cc46b8720c3383c4d5e07258f71513067605f3af4e98ada709037fe6c +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb97335edaaf12e7d04b57d168811b35742d6e80 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f68ea5493995f4bd480618ef95080063c299cb1bf9889e54e67f368e50295cf0 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..382f020457c5f2831932377914bded675ff5039e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97553c6ea6760e161bf7482142842224a0033997c14b61e006ff2f7ae10c024a +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..161855440189181ba47e10abb51733b1fc4e3093 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8946d6a0f45a11de886c6a8d31a7b51eabf81902df7507f1c93812c6b4b6f01 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..26de83c3f3bce77c08aa0fdc646a85355a613bb0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e371c9fd2aa88159e817c8c5175b22abb35a52efc1c96e22a5a080c415017f82 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/latest b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/latest new file mode 100644 index 0000000000000000000000000000000000000000..744ae7dbad571b6f37ec6c7066549494261bb59e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/latest @@ -0,0 +1 @@ +global_step100 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..97f51b498d48145bd9cc14b35f8236b9ec95a4f7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1bec598899f9d59e70c1b4705ce420a1e0a670957b6c8153a589880068ae5a4 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..08e59ac81067b262a084604cd3392250166c2841 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60d2348aae518f4c44693db9c9b4b3a3299c556e7f0a86c188b2e4c3e364a7c +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..20a24c17b4be2ee59cd5e6682010519318a91e58 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe5a79d3bcb4ce033de360bc765e616316e3562aba25887cd85c4adbb935abf +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..54050f6cf8fb847e2a926e14a7aad2647761521a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a9d1f6e22677721841890e6a27855857e6840137650d609eb8e4ac13b71d29 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..263aae475c49b090bce43f143308192c5bf9a95b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac4ff84388a6a4fe3bcae6207c68b2ee5528fb3b6de8cc3588fe1975462aa5 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..942ed5d60ae87dce686b33da76a34db404036dc6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fce3cdf5c1b8a8a291e0c73b384e3ad5252640e21e942b44b26b8b0928ffa9 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..57789be3df3983cb8acc1500bf6470ffadb1c578 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:919e675f3bcaf4f3c8ba35cd8debf85aec3bbc3c8e5019b74431e0a314e4d37a +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..b32d6e2e7eb7148713b473b0c821a98e616ab6e6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf6479ce82b88efc6a72a8ee512162b3d0ecab972817296d38ab9c448bb8d96 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2a1fb08c48e9d34df783eb19e7c9d1caf0ed386 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ec37c3a15b8d061312402391f2fddb52d623a1416d6d2879a30f184450d844f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..286e11fdc03799ba02d410701635ac4a5c482e02 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/trainer_state.json @@ -0,0 +1,581 @@ +{ + "best_metric": 0.30639648, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90", + "epoch": 2.6315789473684212, + "eval_steps": 10, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 9.854056570832201, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -2.15625, + "logits/rejected": -1.40625, + "logps/chosen": -704.0, + "logps/rejected": -416.0, + "loss": 1.337890625, + "memory(GiB)": 10.26, + "nll_loss": 0.6484375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.087961 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 13.591956786084207, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.595703125, + "logits/rejected": -1.576171875, + "logps/chosen": -721.0, + "logps/rejected": -575.5, + "loss": 2.0830078125, + "memory(GiB)": 20.01, + "nll_loss": 1.4130859375, + "rewards/accuracies": 0.3125, + "rewards/chosen": 0.06103515625, + "rewards/margins": 0.0452880859375, + "rewards/rejected": 0.0156402587890625, + "step": 5, + "train_speed(iter/s)": 0.140484 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 7.5902824333777845, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.5265624523162842, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -621.5999755859375, + "logps/rejected": -703.2000122070312, + "loss": 2.04072265625, + "memory(GiB)": 46.14, + "nll_loss": 1.5515625476837158, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 1.015625, + "rewards/margins": 0.6595703363418579, + "rewards/rejected": 0.3578124940395355, + "step": 10, + "train_speed(iter/s)": 0.141561 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -0.9375, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -256.0, + "eval_logps/rejected": -1096.0, + "eval_loss": 0.7734375, + "eval_nll_loss": 0.66015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 3.46875, + "eval_rewards/margins": 2.125, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 2.3433, + "eval_samples_per_second": 1.707, + "eval_steps_per_second": 0.427, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.131075980613862, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.720312476158142, + "logits/rejected": -1.673437476158142, + "logps/chosen": -642.7999877929688, + "logps/rejected": -580.0, + "loss": 1.05107421875, + "memory(GiB)": 46.14, + "nll_loss": 0.87109375, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 4.015625, + "rewards/margins": 2.4703125953674316, + "rewards/rejected": 1.545312523841858, + "step": 15, + "train_speed(iter/s)": 0.146121 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 1.673054658387307, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.7312500476837158, + "logps/chosen": -402.3999938964844, + "logps/rejected": -565.5999755859375, + "loss": 0.83876953125, + "memory(GiB)": 46.14, + "nll_loss": 0.811718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.206250190734863, + "rewards/margins": 4.456250190734863, + "rewards/rejected": 3.7593750953674316, + "step": 20, + "train_speed(iter/s)": 0.148181 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -0.875, + "eval_logits/rejected": -1.8203125, + "eval_logps/chosen": -180.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.39208984375, + "eval_nll_loss": 0.37890625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 4.9375, + "eval_rewards/rejected": 6.09375, + "eval_runtime": 2.3282, + "eval_samples_per_second": 1.718, + "eval_steps_per_second": 0.43, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.3628742287518036, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6453125476837158, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -561.5999755859375, + "logps/rejected": -661.2000122070312, + "loss": 0.58045654296875, + "memory(GiB)": 46.14, + "nll_loss": 0.571093738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.899999618530273, + "rewards/margins": 6.381249904632568, + "rewards/rejected": 4.528124809265137, + "step": 25, + "train_speed(iter/s)": 0.146228 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.7276190764282714, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.5343749523162842, + "logits/rejected": -1.7734375, + "logps/chosen": -405.70001220703125, + "logps/rejected": -592.7999877929688, + "loss": 0.5160888671875, + "memory(GiB)": 46.14, + "nll_loss": 0.515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.899999618530273, + "rewards/margins": 8.46875, + "rewards/rejected": 3.450000047683716, + "step": 30, + "train_speed(iter/s)": 0.146969 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.0078125, + "eval_logits/rejected": -1.8125, + "eval_logps/chosen": -167.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.348876953125, + "eval_nll_loss": 0.34765625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 12.375, + "eval_rewards/margins": 8.5, + "eval_rewards/rejected": 3.84375, + "eval_runtime": 2.3448, + "eval_samples_per_second": 1.706, + "eval_steps_per_second": 0.426, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.8371169486087437, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.728124976158142, + "logits/rejected": -1.7218749523162842, + "logps/chosen": -619.2000122070312, + "logps/rejected": -638.4000244140625, + "loss": 0.568194580078125, + "memory(GiB)": 46.14, + "nll_loss": 0.5648437738418579, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.912500381469727, + "rewards/margins": 9.756250381469727, + "rewards/rejected": 4.153124809265137, + "step": 35, + "train_speed(iter/s)": 0.14542 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7334467774160591, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.662500023841858, + "logits/rejected": -1.6984374523162842, + "logps/chosen": -471.6000061035156, + "logps/rejected": -683.2000122070312, + "loss": 0.53173828125, + "memory(GiB)": 46.14, + "nll_loss": 0.586718738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.212499618530273, + "rewards/margins": 11.912500381469727, + "rewards/rejected": 2.2855467796325684, + "step": 40, + "train_speed(iter/s)": 0.146745 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -0.984375, + "eval_logits/rejected": -1.734375, + "eval_logps/chosen": -159.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.32861328125, + "eval_nll_loss": 0.328125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.1875, + "eval_rewards/margins": 14.0625, + "eval_rewards/rejected": -0.8515625, + "eval_runtime": 2.3307, + "eval_samples_per_second": 1.716, + "eval_steps_per_second": 0.429, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.546250666668433, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.5578124523162842, + "logits/rejected": -1.59375, + "logps/chosen": -442.3999938964844, + "logps/rejected": -720.4000244140625, + "loss": 0.44443359375, + "memory(GiB)": 47.59, + "nll_loss": 0.44414061307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.1875, + "rewards/margins": 15.899999618530273, + "rewards/rejected": -0.7007812261581421, + "step": 45, + "train_speed(iter/s)": 0.145269 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.44204175223844183, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.578125, + "logits/rejected": -1.704687476158142, + "logps/chosen": -491.6000061035156, + "logps/rejected": -678.0, + "loss": 0.50015869140625, + "memory(GiB)": 47.59, + "nll_loss": 0.500781238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.3125, + "rewards/margins": 16.875, + "rewards/rejected": -0.5650390386581421, + "step": 50, + "train_speed(iter/s)": 0.146516 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.0, + "eval_logits/rejected": -1.671875, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1128.0, + "eval_loss": 0.315185546875, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.875, + "eval_rewards/margins": 15.0625, + "eval_rewards/rejected": -1.1484375, + "eval_runtime": 2.3162, + "eval_samples_per_second": 1.727, + "eval_steps_per_second": 0.432, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2875997592353248, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.4328124523162842, + "logits/rejected": -1.529687523841858, + "logps/chosen": -471.6000061035156, + "logps/rejected": -622.4000244140625, + "loss": 0.4770263671875, + "memory(GiB)": 47.59, + "nll_loss": 0.4765625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.575000762939453, + "rewards/margins": 16.112499237060547, + "rewards/rejected": 0.4677734375, + "step": 55, + "train_speed(iter/s)": 0.147535 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.21479272542751, + "learning_rate": 5e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.342187523841858, + "logps/chosen": -517.2000122070312, + "logps/rejected": -496.79998779296875, + "loss": 0.4359375, + "memory(GiB)": 47.59, + "nll_loss": 0.4359374940395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.912500381469727, + "rewards/margins": 16.274999618530273, + "rewards/rejected": 0.638476550579071, + "step": 60, + "train_speed(iter/s)": 0.148886 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -0.828125, + "eval_logits/rejected": -1.6328125, + "eval_logps/chosen": -151.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.310546875, + "eval_nll_loss": 0.310546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.0, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": -0.546875, + "eval_runtime": 2.3283, + "eval_samples_per_second": 1.718, + "eval_steps_per_second": 0.43, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.5495600856192168, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.5078125, + "logits/rejected": -1.4968750476837158, + "logps/chosen": -452.3999938964844, + "logps/rejected": -544.7999877929688, + "loss": 0.409503173828125, + "memory(GiB)": 47.59, + "nll_loss": 0.4097656309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.987499237060547, + "rewards/margins": 16.762500762939453, + "rewards/rejected": 0.22265625, + "step": 65, + "train_speed(iter/s)": 0.148787 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.3929300642395231, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.384374976158142, + "logits/rejected": -1.5164062976837158, + "logps/chosen": -402.79998779296875, + "logps/rejected": -641.2000122070312, + "loss": 0.4465576171875, + "memory(GiB)": 47.59, + "nll_loss": 0.44609373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.087499618530273, + "rewards/margins": 16.424999237060547, + "rewards/rejected": 0.674121081829071, + "step": 70, + "train_speed(iter/s)": 0.149098 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -0.703125, + "eval_logits/rejected": -1.625, + "eval_logps/chosen": -150.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.310546875, + "eval_nll_loss": 0.310546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.0625, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.69921875, + "eval_runtime": 2.3424, + "eval_samples_per_second": 1.708, + "eval_steps_per_second": 0.427, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.6458764930563424, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.3671875, + "logits/rejected": -1.3984375, + "logps/chosen": -510.0, + "logps/rejected": -680.4000244140625, + "loss": 0.4861083984375, + "memory(GiB)": 47.59, + "nll_loss": 0.486328125, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.462499618530273, + "rewards/margins": 16.649999618530273, + "rewards/rejected": 0.7554687261581421, + "step": 75, + "train_speed(iter/s)": 0.148719 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5294379022539796, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.404687523841858, + "logits/rejected": -1.404687523841858, + "logps/chosen": -432.79998779296875, + "logps/rejected": -620.7999877929688, + "loss": 0.46317138671875, + "memory(GiB)": 47.59, + "nll_loss": 0.47539061307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.850000381469727, + "rewards/margins": 17.174999237060547, + "rewards/rejected": -0.3223632872104645, + "step": 80, + "train_speed(iter/s)": 0.147985 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -0.703125, + "eval_logits/rejected": -1.6015625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.30712890625, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.125, + "eval_rewards/margins": 12.875, + "eval_rewards/rejected": 1.25, + "eval_runtime": 2.3227, + "eval_samples_per_second": 1.722, + "eval_steps_per_second": 0.431, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.5098063945805259, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.2625000476837158, + "logits/rejected": -1.5234375, + "logps/chosen": -390.79998779296875, + "logps/rejected": -747.2000122070312, + "loss": 0.4080902099609375, + "memory(GiB)": 47.59, + "nll_loss": 0.4078125059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.325000762939453, + "rewards/margins": 16.137500762939453, + "rewards/rejected": 1.209570288658142, + "step": 85, + "train_speed(iter/s)": 0.14806 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.2629503293334859, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.443750023841858, + "logits/rejected": -1.2296874523162842, + "logps/chosen": -483.20001220703125, + "logps/rejected": -589.5999755859375, + "loss": 0.42548675537109376, + "memory(GiB)": 47.59, + "nll_loss": 0.42500001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 18.5, + "rewards/margins": 16.799999237060547, + "rewards/rejected": 1.658789038658142, + "step": 90, + "train_speed(iter/s)": 0.149448 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -0.70703125, + "eval_logits/rejected": -1.59375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.306396484375, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.25, + "eval_rewards/margins": 13.0, + "eval_rewards/rejected": 1.1953125, + "eval_runtime": 2.3883, + "eval_samples_per_second": 1.675, + "eval_steps_per_second": 0.419, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.2528000224499017, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -1.3624999523162842, + "logits/rejected": -1.4609375, + "logps/chosen": -526.7999877929688, + "logps/rejected": -720.0, + "loss": 0.470208740234375, + "memory(GiB)": 47.59, + "nll_loss": 0.46953123807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 19.25, + "rewards/margins": 18.237499237060547, + "rewards/rejected": 1.026757836341858, + "step": 95, + "train_speed(iter/s)": 0.148517 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.4249614464684851, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -1.396875023841858, + "logits/rejected": -1.5390625, + "logps/chosen": -406.20001220703125, + "logps/rejected": -563.2000122070312, + "loss": 0.4023651123046875, + "memory(GiB)": 56.84, + "nll_loss": 0.40253907442092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.762500762939453, + "rewards/margins": 17.0625, + "rewards/rejected": -0.31132811307907104, + "step": 100, + "train_speed(iter/s)": 0.14936 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -0.703125, + "eval_logits/rejected": -1.59375, + "eval_logps/chosen": -148.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.306396484375, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.25, + "eval_rewards/margins": 13.25, + "eval_rewards/rejected": 1.0, + "eval_runtime": 2.3404, + "eval_samples_per_second": 1.709, + "eval_steps_per_second": 0.427, + "step": 100 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 316219562655744.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d6eb2d3b8b7c4320d19ebec79bd57b1c49c6f84 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea3fd7d1178b3676ea45fdfde0e899a240c24d4d03e55eb09be073e4c532e11 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-100/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/README.md b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c5c0404813bb3d884c2d0750e24391042738c029 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-14b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0f5ad0d738a546c5dfcb69e5366b5e203ca99083 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "o_proj", + "v_proj", + "q_proj", + "up_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45bf84d867ccb626254f50d0ff20ce48c9e0f397 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:728aa9ce19e5c287cab961313a886b7a65da22f400dbd653107e6eb105b9c407 +size 68902296 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/args.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/args.json new file mode 100644 index 0000000000000000000000000000000000000000..b3b67557396afa371bd4ae4e246e102ed2523a72 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-14b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..18749e058b093a9f48e8b987a34cff1b88ebec64 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1f70ecc0ffdb86a587ea78f97525f85b1739a2eb685d256e11a6fbd11971612 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f877bf7152ec65064132a03bee51b29757c03d45 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efdb183d9ed59271d196c18f5b22f9340f72a953ffe531585cb2ecd0ceb3b2b2 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ccf8189677090a52a32d118ed20f844b117fd8f5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4bb739e6bc2923a8771de1e1763ebebf39a52ac23406a18eb897ca15d210915 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..54180306ec6be74cbcd955f96e7b969271c8b5c4 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1afbc3b21be91533ff67ddd048fb61647cb3f917fb3f671d736f9dd48abf1028 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d9ba3a02db332e6a08455124a4b3bdacadbd708 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a3267f40ad1878d8299afc8d58e168b615dc3701b62b5d80f9399761390e50a +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..985182c7087cce9f7685661c8483bbf5aeb2019d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de1998923451ee0587364a9a372e52efe5835737bc18500a1893399aead438cd +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ed685f640b9057a359662da28a8b6d7350d1e63 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c206dff74b64cb606660cd5b6259cf84e795b50d36d8b1724f649a1f0d564056 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..24f7589506682cf9164960b5e25d06fb0cc24194 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75f2a7b9c43b98b4598471299ba7a39f0e1fda6947466209a1302a81597dbe52 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9b6dfaee1368c16d95dda01000019032e8f73eb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cfb7fb8bbdac3c53af8aacd47d8558291e75bfd01d7b4edbe195c9b7cf3264e +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..22eb91a1a8757debe9f8787fbde77224d9b17e96 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:244656b5370a022133d56ecf244f500ddb96315b5edeacf2c2be253b20caad1e +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..89a731a2b11066ecf64bf9c72ac900cb8fe83c71 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eff9e36d9838e901f5dd8cff2f4a6cec5aac98985915b36a5da4d6ed344e3fe6 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f51dfbcf6981eb37c0a945576f20bfb0f4d48e68 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a216e82bfd7548b89fa07cd74b4a0f7844f56996bb401be1c7104f1acce66d5 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..da6fdcf31e89d7dae19ca2b9e5a22bd5a2b1797f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43196b68f0349fc7030997a40ded0d6ff8fa2b0eb3217108654fc3b97836a3cf +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fc78d32361c82899f602e80a67778d52299f38b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f727b2aa45fccd37f5e22c05d8c76daf0653f24c8d7f2c82f9b77344846d202 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..43bd2ed2136f04bbd33135ddbcba8eb278e25d8d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0704c3c886966fa12eb8c4e178097af9d0278e93024f996e1342c72be1979b7f +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5ad37553a29d9b1ee0b918421b9b2d6d3ece89b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2714c41054a80bc60e001a072d6081cb1dd55a0b44099c185f6e941d8701980 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/latest b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/latest new file mode 100644 index 0000000000000000000000000000000000000000..a9a22a69382a7711ca9e8ab6945c6d2cc8984927 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/latest @@ -0,0 +1 @@ +global_step110 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..584f4a4a43f100f35696d7314a633631af587f25 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7891ffa7c7dae99113aa986d67278b52b8c57db55001dc3547a61f24569a34ee +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..05b027a867e5e9cebd446293ecff82cfb240cc76 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b92875cb04deec367605433847d1bda444b178b643d2da7ed9aaf738d232b4 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..af98f0dfe2a5d89fbccf90df58246a0b078c7016 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f5f3338a05e325b5408a1cd0b6f5e5b10fad05fe479d63f44bec4cf18107d6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..715aa4a4ee3915f810fc2bacb2153eb8a0913781 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1be749fea477a3867d44010631937e0d8f071ca5f9614f9795c92c7fa68833a6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..c7bde70899833455b6ee4a99aff9388abc5ffe92 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbc4a5ea4532c621f4c8e9891117b2e597a7f005001e8b4f2a1b4da8c82bf964 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..90cdeaa2fe438098e9d95ddbc06c765e51af1e78 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480f9fe7dd71b54d915b46162e34b780ba2467d5542115cc809dbca60b394c0e +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..2bd30529614c5be239cd9477af6bef0e313740b6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11d982dcd813e82c2d97a5491ce9624cff2dd22e8655ea617ccef1fc1474470 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..bed311094effd49cc2c89237c675f56eade157d1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73494fac3a001cba7cedd097b97f028d4c1d136ee6709214b0a7fe305e5b9089 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..38b1a376e7c81e3c533cf8a69ddf4eefa9d1336c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0767a9fe84680a5a8a76633a443cb301092115c026c1f5f7f1fbdc53dd7f856f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e4dc9546e01625379ccfbed949880d285aa46bed --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/trainer_state.json @@ -0,0 +1,634 @@ +{ + "best_metric": 0.30639648, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90", + "epoch": 2.8947368421052633, + "eval_steps": 10, + "global_step": 110, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 9.854056570832201, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -2.15625, + "logits/rejected": -1.40625, + "logps/chosen": -704.0, + "logps/rejected": -416.0, + "loss": 1.337890625, + "memory(GiB)": 10.26, + "nll_loss": 0.6484375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.087961 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 13.591956786084207, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.595703125, + "logits/rejected": -1.576171875, + "logps/chosen": -721.0, + "logps/rejected": -575.5, + "loss": 2.0830078125, + "memory(GiB)": 20.01, + "nll_loss": 1.4130859375, + "rewards/accuracies": 0.3125, + "rewards/chosen": 0.06103515625, + "rewards/margins": 0.0452880859375, + "rewards/rejected": 0.0156402587890625, + "step": 5, + "train_speed(iter/s)": 0.140484 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 7.5902824333777845, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.5265624523162842, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -621.5999755859375, + "logps/rejected": -703.2000122070312, + "loss": 2.04072265625, + "memory(GiB)": 46.14, + "nll_loss": 1.5515625476837158, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 1.015625, + "rewards/margins": 0.6595703363418579, + "rewards/rejected": 0.3578124940395355, + "step": 10, + "train_speed(iter/s)": 0.141561 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -0.9375, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -256.0, + "eval_logps/rejected": -1096.0, + "eval_loss": 0.7734375, + "eval_nll_loss": 0.66015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 3.46875, + "eval_rewards/margins": 2.125, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 2.3433, + "eval_samples_per_second": 1.707, + "eval_steps_per_second": 0.427, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.131075980613862, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.720312476158142, + "logits/rejected": -1.673437476158142, + "logps/chosen": -642.7999877929688, + "logps/rejected": -580.0, + "loss": 1.05107421875, + "memory(GiB)": 46.14, + "nll_loss": 0.87109375, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 4.015625, + "rewards/margins": 2.4703125953674316, + "rewards/rejected": 1.545312523841858, + "step": 15, + "train_speed(iter/s)": 0.146121 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 1.673054658387307, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.7312500476837158, + "logps/chosen": -402.3999938964844, + "logps/rejected": -565.5999755859375, + "loss": 0.83876953125, + "memory(GiB)": 46.14, + "nll_loss": 0.811718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.206250190734863, + "rewards/margins": 4.456250190734863, + "rewards/rejected": 3.7593750953674316, + "step": 20, + "train_speed(iter/s)": 0.148181 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -0.875, + "eval_logits/rejected": -1.8203125, + "eval_logps/chosen": -180.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.39208984375, + "eval_nll_loss": 0.37890625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 4.9375, + "eval_rewards/rejected": 6.09375, + "eval_runtime": 2.3282, + "eval_samples_per_second": 1.718, + "eval_steps_per_second": 0.43, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.3628742287518036, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6453125476837158, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -561.5999755859375, + "logps/rejected": -661.2000122070312, + "loss": 0.58045654296875, + "memory(GiB)": 46.14, + "nll_loss": 0.571093738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.899999618530273, + "rewards/margins": 6.381249904632568, + "rewards/rejected": 4.528124809265137, + "step": 25, + "train_speed(iter/s)": 0.146228 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.7276190764282714, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.5343749523162842, + "logits/rejected": -1.7734375, + "logps/chosen": -405.70001220703125, + "logps/rejected": -592.7999877929688, + "loss": 0.5160888671875, + "memory(GiB)": 46.14, + "nll_loss": 0.515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.899999618530273, + "rewards/margins": 8.46875, + "rewards/rejected": 3.450000047683716, + "step": 30, + "train_speed(iter/s)": 0.146969 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.0078125, + "eval_logits/rejected": -1.8125, + "eval_logps/chosen": -167.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.348876953125, + "eval_nll_loss": 0.34765625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 12.375, + "eval_rewards/margins": 8.5, + "eval_rewards/rejected": 3.84375, + "eval_runtime": 2.3448, + "eval_samples_per_second": 1.706, + "eval_steps_per_second": 0.426, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.8371169486087437, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.728124976158142, + "logits/rejected": -1.7218749523162842, + "logps/chosen": -619.2000122070312, + "logps/rejected": -638.4000244140625, + "loss": 0.568194580078125, + "memory(GiB)": 46.14, + "nll_loss": 0.5648437738418579, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.912500381469727, + "rewards/margins": 9.756250381469727, + "rewards/rejected": 4.153124809265137, + "step": 35, + "train_speed(iter/s)": 0.14542 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7334467774160591, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.662500023841858, + "logits/rejected": -1.6984374523162842, + "logps/chosen": -471.6000061035156, + "logps/rejected": -683.2000122070312, + "loss": 0.53173828125, + "memory(GiB)": 46.14, + "nll_loss": 0.586718738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.212499618530273, + "rewards/margins": 11.912500381469727, + "rewards/rejected": 2.2855467796325684, + "step": 40, + "train_speed(iter/s)": 0.146745 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -0.984375, + "eval_logits/rejected": -1.734375, + "eval_logps/chosen": -159.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.32861328125, + "eval_nll_loss": 0.328125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.1875, + "eval_rewards/margins": 14.0625, + "eval_rewards/rejected": -0.8515625, + "eval_runtime": 2.3307, + "eval_samples_per_second": 1.716, + "eval_steps_per_second": 0.429, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.546250666668433, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.5578124523162842, + "logits/rejected": -1.59375, + "logps/chosen": -442.3999938964844, + "logps/rejected": -720.4000244140625, + "loss": 0.44443359375, + "memory(GiB)": 47.59, + "nll_loss": 0.44414061307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.1875, + "rewards/margins": 15.899999618530273, + "rewards/rejected": -0.7007812261581421, + "step": 45, + "train_speed(iter/s)": 0.145269 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.44204175223844183, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.578125, + "logits/rejected": -1.704687476158142, + "logps/chosen": -491.6000061035156, + "logps/rejected": -678.0, + "loss": 0.50015869140625, + "memory(GiB)": 47.59, + "nll_loss": 0.500781238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.3125, + "rewards/margins": 16.875, + "rewards/rejected": -0.5650390386581421, + "step": 50, + "train_speed(iter/s)": 0.146516 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.0, + "eval_logits/rejected": -1.671875, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1128.0, + "eval_loss": 0.315185546875, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.875, + "eval_rewards/margins": 15.0625, + "eval_rewards/rejected": -1.1484375, + "eval_runtime": 2.3162, + "eval_samples_per_second": 1.727, + "eval_steps_per_second": 0.432, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2875997592353248, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.4328124523162842, + "logits/rejected": -1.529687523841858, + "logps/chosen": -471.6000061035156, + "logps/rejected": -622.4000244140625, + "loss": 0.4770263671875, + "memory(GiB)": 47.59, + "nll_loss": 0.4765625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.575000762939453, + "rewards/margins": 16.112499237060547, + "rewards/rejected": 0.4677734375, + "step": 55, + "train_speed(iter/s)": 0.147535 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.21479272542751, + "learning_rate": 5e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.342187523841858, + "logps/chosen": -517.2000122070312, + "logps/rejected": -496.79998779296875, + "loss": 0.4359375, + "memory(GiB)": 47.59, + "nll_loss": 0.4359374940395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.912500381469727, + "rewards/margins": 16.274999618530273, + "rewards/rejected": 0.638476550579071, + "step": 60, + "train_speed(iter/s)": 0.148886 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -0.828125, + "eval_logits/rejected": -1.6328125, + "eval_logps/chosen": -151.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.310546875, + "eval_nll_loss": 0.310546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.0, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": -0.546875, + "eval_runtime": 2.3283, + "eval_samples_per_second": 1.718, + "eval_steps_per_second": 0.43, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.5495600856192168, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.5078125, + "logits/rejected": -1.4968750476837158, + "logps/chosen": -452.3999938964844, + "logps/rejected": -544.7999877929688, + "loss": 0.409503173828125, + "memory(GiB)": 47.59, + "nll_loss": 0.4097656309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.987499237060547, + "rewards/margins": 16.762500762939453, + "rewards/rejected": 0.22265625, + "step": 65, + "train_speed(iter/s)": 0.148787 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.3929300642395231, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.384374976158142, + "logits/rejected": -1.5164062976837158, + "logps/chosen": -402.79998779296875, + "logps/rejected": -641.2000122070312, + "loss": 0.4465576171875, + "memory(GiB)": 47.59, + "nll_loss": 0.44609373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.087499618530273, + "rewards/margins": 16.424999237060547, + "rewards/rejected": 0.674121081829071, + "step": 70, + "train_speed(iter/s)": 0.149098 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -0.703125, + "eval_logits/rejected": -1.625, + "eval_logps/chosen": -150.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.310546875, + "eval_nll_loss": 0.310546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.0625, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.69921875, + "eval_runtime": 2.3424, + "eval_samples_per_second": 1.708, + "eval_steps_per_second": 0.427, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.6458764930563424, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.3671875, + "logits/rejected": -1.3984375, + "logps/chosen": -510.0, + "logps/rejected": -680.4000244140625, + "loss": 0.4861083984375, + "memory(GiB)": 47.59, + "nll_loss": 0.486328125, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.462499618530273, + "rewards/margins": 16.649999618530273, + "rewards/rejected": 0.7554687261581421, + "step": 75, + "train_speed(iter/s)": 0.148719 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5294379022539796, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.404687523841858, + "logits/rejected": -1.404687523841858, + "logps/chosen": -432.79998779296875, + "logps/rejected": -620.7999877929688, + "loss": 0.46317138671875, + "memory(GiB)": 47.59, + "nll_loss": 0.47539061307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.850000381469727, + "rewards/margins": 17.174999237060547, + "rewards/rejected": -0.3223632872104645, + "step": 80, + "train_speed(iter/s)": 0.147985 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -0.703125, + "eval_logits/rejected": -1.6015625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.30712890625, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.125, + "eval_rewards/margins": 12.875, + "eval_rewards/rejected": 1.25, + "eval_runtime": 2.3227, + "eval_samples_per_second": 1.722, + "eval_steps_per_second": 0.431, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.5098063945805259, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.2625000476837158, + "logits/rejected": -1.5234375, + "logps/chosen": -390.79998779296875, + "logps/rejected": -747.2000122070312, + "loss": 0.4080902099609375, + "memory(GiB)": 47.59, + "nll_loss": 0.4078125059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.325000762939453, + "rewards/margins": 16.137500762939453, + "rewards/rejected": 1.209570288658142, + "step": 85, + "train_speed(iter/s)": 0.14806 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.2629503293334859, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.443750023841858, + "logits/rejected": -1.2296874523162842, + "logps/chosen": -483.20001220703125, + "logps/rejected": -589.5999755859375, + "loss": 0.42548675537109376, + "memory(GiB)": 47.59, + "nll_loss": 0.42500001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 18.5, + "rewards/margins": 16.799999237060547, + "rewards/rejected": 1.658789038658142, + "step": 90, + "train_speed(iter/s)": 0.149448 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -0.70703125, + "eval_logits/rejected": -1.59375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.306396484375, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.25, + "eval_rewards/margins": 13.0, + "eval_rewards/rejected": 1.1953125, + "eval_runtime": 2.3883, + "eval_samples_per_second": 1.675, + "eval_steps_per_second": 0.419, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.2528000224499017, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -1.3624999523162842, + "logits/rejected": -1.4609375, + "logps/chosen": -526.7999877929688, + "logps/rejected": -720.0, + "loss": 0.470208740234375, + "memory(GiB)": 47.59, + "nll_loss": 0.46953123807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 19.25, + "rewards/margins": 18.237499237060547, + "rewards/rejected": 1.026757836341858, + "step": 95, + "train_speed(iter/s)": 0.148517 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.4249614464684851, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -1.396875023841858, + "logits/rejected": -1.5390625, + "logps/chosen": -406.20001220703125, + "logps/rejected": -563.2000122070312, + "loss": 0.4023651123046875, + "memory(GiB)": 56.84, + "nll_loss": 0.40253907442092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.762500762939453, + "rewards/margins": 17.0625, + "rewards/rejected": -0.31132811307907104, + "step": 100, + "train_speed(iter/s)": 0.14936 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -0.703125, + "eval_logits/rejected": -1.59375, + "eval_logps/chosen": -148.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.306396484375, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.25, + "eval_rewards/margins": 13.25, + "eval_rewards/rejected": 1.0, + "eval_runtime": 2.3404, + "eval_samples_per_second": 1.709, + "eval_steps_per_second": 0.427, + "step": 100 + }, + { + "epoch": 2.763157894736842, + "grad_norm": 0.5639157494773256, + "learning_rate": 1.70370868554659e-06, + "logits/chosen": -1.41796875, + "logits/rejected": -1.4953124523162842, + "logps/chosen": -432.54998779296875, + "logps/rejected": -568.7999877929688, + "loss": 0.41885986328125, + "memory(GiB)": 56.84, + "nll_loss": 0.41874998807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.112499237060547, + "rewards/margins": 16.299999237060547, + "rewards/rejected": 0.7904297113418579, + "step": 105, + "train_speed(iter/s)": 0.149009 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 0.4926501392943744, + "learning_rate": 3.380821129028489e-07, + "logits/chosen": -1.357812523841858, + "logits/rejected": -1.529687523841858, + "logps/chosen": -453.6000061035156, + "logps/rejected": -603.5999755859375, + "loss": 0.39718017578125, + "memory(GiB)": 56.84, + "nll_loss": 0.3970703184604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.862499237060547, + "rewards/margins": 17.375, + "rewards/rejected": 0.46240234375, + "step": 110, + "train_speed(iter/s)": 0.149381 + }, + { + "epoch": 2.8947368421052633, + "eval_logits/chosen": -0.6953125, + "eval_logits/rejected": -1.5859375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.3076171875, + "eval_nll_loss": 0.30859375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.1875, + "eval_rewards/margins": 13.125, + "eval_rewards/rejected": 1.046875, + "eval_runtime": 2.3273, + "eval_samples_per_second": 1.719, + "eval_steps_per_second": 0.43, + "step": 110 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 347389503471616.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d6eb2d3b8b7c4320d19ebec79bd57b1c49c6f84 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea3fd7d1178b3676ea45fdfde0e899a240c24d4d03e55eb09be073e4c532e11 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-110/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/README.md b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c5c0404813bb3d884c2d0750e24391042738c029 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-14b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0f5ad0d738a546c5dfcb69e5366b5e203ca99083 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "o_proj", + "v_proj", + "q_proj", + "up_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ac02c2456a42f0118b085d01bb6ab6ddb51ce04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d3e83dbedfcd6dae8cbb1c03c343330f7ea0f860944742b807acbbea6979047 +size 68902296 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/args.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/args.json new file mode 100644 index 0000000000000000000000000000000000000000..b3b67557396afa371bd4ae4e246e102ed2523a72 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-14b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..15bf71e609af570742beebf48aa3c84870119401 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade14b52de1719023d66a0b8778a3941926df5407356df7da078d7ab7185f071 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c88991afca394ac01237e84f424190e9e7892898 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59476e8ea01ebc2eb0f41c3b8b0c1b14c77ca58869828777b8de086b1ce3848c +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d573765650b3d480409fee593441f74df5970979 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38a01dab4e15a7b0b1e015e3e5bc11b53fb2e8c2fbd2f9a1d99676f5e1ef7c73 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..52e8f93087e976beec71672acce2ae945dd8dc61 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c4d8c0294fb3f77d9822139c8200806513924c14c34f1962bc52f0e6facd8d5 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0096130035edec4166f67a43a4987cb659c923a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eca25bab764adffb48b907505ae9e0d38914aa3b27735ad1f6c7f35300e874d +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4df9c8107189cb4a88a4a18489b374a163353ad6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27c836385214acce0e7491318d16e00d4e60a83d7c837b76f9be774174c2b6dd +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4bb4bbfe700cffdd73fefced0be4661058684c17 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4028eb7251684d2ca2bfc86d2b8b1a8e7ef4ae2bbbc921446d7bcae51b6bdade +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8ca984303e1b791213e0d7ea82a52fc0d67a619 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68ab6238d32b6feb9058efc18ff8f016cb0b29b57f4ad69f93c70ccc621ea908 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8459674d5176bf1de6e3e48ebdb88d1c08100ceb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de1c58d4f94c2dd9e1c44f5bdcc3b6161fa2d62e7556033df0903635c4e1b80f +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..43eec646f564dc9e76d5e8f3171c89a45c056565 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcb2a4efc3e314ebb2f3cfcf6b8de4f301096f2cba69514fec3a1954e4286794 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f9873b05462fb31c872408c1fbc5fa06b1a5106 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8478ec52fe15cafbacba6a99ea433ac446dc52c8f7a424ad900dad132b6b710b +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8611ec147ba2721aab0a20b09c7a2b311ceaa65a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a75c2cac370dd83bf6233c436aeffaac04de47b8a8a1f4f4c6e77b940ffe8b07 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bfe9507b95e850e9c88a9948976528b6003497ee --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fb0a4cb7bd6c2886133af02a6a8d0a4e62892916c1de4f3b2ff45873e8e612d +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..00fa8dd30af72b20b7d1f1266d652e4ab8b6d5e5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:003881962508336e23d4fc3b713678d96db0284cf85af9600165a7e3bd006fd2 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2edd70e809603a7907dceeed982d3c475046fa6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec29ddcf52e54e9ec33eb915e80f653168bf5c89efb4d8b1422ab6b66635df8 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..262e0ecfae1c1f9e8c14e118a094bd550a8b5d44 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c29a1f7426bafff0a760e962066b2fa7f441e039a0e792ab9f6f3c7c9d55eb6 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/latest b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/latest new file mode 100644 index 0000000000000000000000000000000000000000..aad80f76777fd4d23b0b81026f4601524335cbe1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/latest @@ -0,0 +1 @@ +global_step114 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..80f51268a9828e9592a20d8ae8b2cd4ba4bc362c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d994b317c4df888a1a1aabc0c532e81f1fa34c18c8313cb2feadca3bb37194 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..62e581603d525611f9660b6e859462f72bbc9258 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5b05860618aa49c7f5d8c366d6ee73cf8b3b0d0adc17d9313b72621630d0aa +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..18b03e632222a58e33ea4fca874b9c52628cc5e1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7262faf861e984775b4fd85bc76a11b0b8b04037690e8a08a58cf9ff5328a042 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..41735b5f7ace13ffa57ebed3e7042f1a48ac17fb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9479cad91150e2e266d17eb95fe678579a770f6df6b53496cf72067b186b094d +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..ebbb93c1d99b1645075ea27fc9fae66992a691f5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:435cb6cf559e0ce3fe0d4582cac16ea40b48b7a64589952402a4c399cafbfc00 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0656f84b77a33c9ceba9df16f36437b55ef71bc7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51001b0d8dc5792180c3a9705ccbfa66b61d46d7639afb6f7abf409629ed74f +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..32b3a313372ee4a2eeaeed69789f8fb4e2c70ad0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1e87084f11088fdce293e1fbbb05e35f5c7385b00e2f9ba195bf61cb36f757d +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..9c5a45264129fe1d7c409a6867de1a9751476a8e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d32e9bdd65145ae509e6c6ef4f6ea9d842f94a34c34a0d7d2ab6c248d3f2121 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a86ac614a477eb67963adb2c8c07f37c79ded059 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d7a9fd18bda7faa50931342147a7de5605bed0f91f6c70d821e84b7bf8f444f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3b09b5159d3042b35b902b186f20cf2cc5f9e16c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/trainer_state.json @@ -0,0 +1,651 @@ +{ + "best_metric": 0.30639648, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90", + "epoch": 3.0, + "eval_steps": 10, + "global_step": 114, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 9.854056570832201, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -2.15625, + "logits/rejected": -1.40625, + "logps/chosen": -704.0, + "logps/rejected": -416.0, + "loss": 1.337890625, + "memory(GiB)": 10.26, + "nll_loss": 0.6484375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.087961 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 13.591956786084207, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.595703125, + "logits/rejected": -1.576171875, + "logps/chosen": -721.0, + "logps/rejected": -575.5, + "loss": 2.0830078125, + "memory(GiB)": 20.01, + "nll_loss": 1.4130859375, + "rewards/accuracies": 0.3125, + "rewards/chosen": 0.06103515625, + "rewards/margins": 0.0452880859375, + "rewards/rejected": 0.0156402587890625, + "step": 5, + "train_speed(iter/s)": 0.140484 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 7.5902824333777845, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.5265624523162842, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -621.5999755859375, + "logps/rejected": -703.2000122070312, + "loss": 2.04072265625, + "memory(GiB)": 46.14, + "nll_loss": 1.5515625476837158, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 1.015625, + "rewards/margins": 0.6595703363418579, + "rewards/rejected": 0.3578124940395355, + "step": 10, + "train_speed(iter/s)": 0.141561 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -0.9375, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -256.0, + "eval_logps/rejected": -1096.0, + "eval_loss": 0.7734375, + "eval_nll_loss": 0.66015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 3.46875, + "eval_rewards/margins": 2.125, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 2.3433, + "eval_samples_per_second": 1.707, + "eval_steps_per_second": 0.427, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.131075980613862, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.720312476158142, + "logits/rejected": -1.673437476158142, + "logps/chosen": -642.7999877929688, + "logps/rejected": -580.0, + "loss": 1.05107421875, + "memory(GiB)": 46.14, + "nll_loss": 0.87109375, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 4.015625, + "rewards/margins": 2.4703125953674316, + "rewards/rejected": 1.545312523841858, + "step": 15, + "train_speed(iter/s)": 0.146121 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 1.673054658387307, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.7312500476837158, + "logps/chosen": -402.3999938964844, + "logps/rejected": -565.5999755859375, + "loss": 0.83876953125, + "memory(GiB)": 46.14, + "nll_loss": 0.811718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.206250190734863, + "rewards/margins": 4.456250190734863, + "rewards/rejected": 3.7593750953674316, + "step": 20, + "train_speed(iter/s)": 0.148181 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -0.875, + "eval_logits/rejected": -1.8203125, + "eval_logps/chosen": -180.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.39208984375, + "eval_nll_loss": 0.37890625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 4.9375, + "eval_rewards/rejected": 6.09375, + "eval_runtime": 2.3282, + "eval_samples_per_second": 1.718, + "eval_steps_per_second": 0.43, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.3628742287518036, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6453125476837158, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -561.5999755859375, + "logps/rejected": -661.2000122070312, + "loss": 0.58045654296875, + "memory(GiB)": 46.14, + "nll_loss": 0.571093738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.899999618530273, + "rewards/margins": 6.381249904632568, + "rewards/rejected": 4.528124809265137, + "step": 25, + "train_speed(iter/s)": 0.146228 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.7276190764282714, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.5343749523162842, + "logits/rejected": -1.7734375, + "logps/chosen": -405.70001220703125, + "logps/rejected": -592.7999877929688, + "loss": 0.5160888671875, + "memory(GiB)": 46.14, + "nll_loss": 0.515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.899999618530273, + "rewards/margins": 8.46875, + "rewards/rejected": 3.450000047683716, + "step": 30, + "train_speed(iter/s)": 0.146969 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.0078125, + "eval_logits/rejected": -1.8125, + "eval_logps/chosen": -167.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.348876953125, + "eval_nll_loss": 0.34765625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 12.375, + "eval_rewards/margins": 8.5, + "eval_rewards/rejected": 3.84375, + "eval_runtime": 2.3448, + "eval_samples_per_second": 1.706, + "eval_steps_per_second": 0.426, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.8371169486087437, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.728124976158142, + "logits/rejected": -1.7218749523162842, + "logps/chosen": -619.2000122070312, + "logps/rejected": -638.4000244140625, + "loss": 0.568194580078125, + "memory(GiB)": 46.14, + "nll_loss": 0.5648437738418579, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.912500381469727, + "rewards/margins": 9.756250381469727, + "rewards/rejected": 4.153124809265137, + "step": 35, + "train_speed(iter/s)": 0.14542 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7334467774160591, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.662500023841858, + "logits/rejected": -1.6984374523162842, + "logps/chosen": -471.6000061035156, + "logps/rejected": -683.2000122070312, + "loss": 0.53173828125, + "memory(GiB)": 46.14, + "nll_loss": 0.586718738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.212499618530273, + "rewards/margins": 11.912500381469727, + "rewards/rejected": 2.2855467796325684, + "step": 40, + "train_speed(iter/s)": 0.146745 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -0.984375, + "eval_logits/rejected": -1.734375, + "eval_logps/chosen": -159.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.32861328125, + "eval_nll_loss": 0.328125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.1875, + "eval_rewards/margins": 14.0625, + "eval_rewards/rejected": -0.8515625, + "eval_runtime": 2.3307, + "eval_samples_per_second": 1.716, + "eval_steps_per_second": 0.429, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.546250666668433, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.5578124523162842, + "logits/rejected": -1.59375, + "logps/chosen": -442.3999938964844, + "logps/rejected": -720.4000244140625, + "loss": 0.44443359375, + "memory(GiB)": 47.59, + "nll_loss": 0.44414061307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.1875, + "rewards/margins": 15.899999618530273, + "rewards/rejected": -0.7007812261581421, + "step": 45, + "train_speed(iter/s)": 0.145269 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.44204175223844183, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.578125, + "logits/rejected": -1.704687476158142, + "logps/chosen": -491.6000061035156, + "logps/rejected": -678.0, + "loss": 0.50015869140625, + "memory(GiB)": 47.59, + "nll_loss": 0.500781238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.3125, + "rewards/margins": 16.875, + "rewards/rejected": -0.5650390386581421, + "step": 50, + "train_speed(iter/s)": 0.146516 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.0, + "eval_logits/rejected": -1.671875, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1128.0, + "eval_loss": 0.315185546875, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.875, + "eval_rewards/margins": 15.0625, + "eval_rewards/rejected": -1.1484375, + "eval_runtime": 2.3162, + "eval_samples_per_second": 1.727, + "eval_steps_per_second": 0.432, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2875997592353248, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.4328124523162842, + "logits/rejected": -1.529687523841858, + "logps/chosen": -471.6000061035156, + "logps/rejected": -622.4000244140625, + "loss": 0.4770263671875, + "memory(GiB)": 47.59, + "nll_loss": 0.4765625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.575000762939453, + "rewards/margins": 16.112499237060547, + "rewards/rejected": 0.4677734375, + "step": 55, + "train_speed(iter/s)": 0.147535 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.21479272542751, + "learning_rate": 5e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.342187523841858, + "logps/chosen": -517.2000122070312, + "logps/rejected": -496.79998779296875, + "loss": 0.4359375, + "memory(GiB)": 47.59, + "nll_loss": 0.4359374940395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.912500381469727, + "rewards/margins": 16.274999618530273, + "rewards/rejected": 0.638476550579071, + "step": 60, + "train_speed(iter/s)": 0.148886 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -0.828125, + "eval_logits/rejected": -1.6328125, + "eval_logps/chosen": -151.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.310546875, + "eval_nll_loss": 0.310546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.0, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": -0.546875, + "eval_runtime": 2.3283, + "eval_samples_per_second": 1.718, + "eval_steps_per_second": 0.43, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.5495600856192168, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.5078125, + "logits/rejected": -1.4968750476837158, + "logps/chosen": -452.3999938964844, + "logps/rejected": -544.7999877929688, + "loss": 0.409503173828125, + "memory(GiB)": 47.59, + "nll_loss": 0.4097656309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.987499237060547, + "rewards/margins": 16.762500762939453, + "rewards/rejected": 0.22265625, + "step": 65, + "train_speed(iter/s)": 0.148787 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.3929300642395231, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.384374976158142, + "logits/rejected": -1.5164062976837158, + "logps/chosen": -402.79998779296875, + "logps/rejected": -641.2000122070312, + "loss": 0.4465576171875, + "memory(GiB)": 47.59, + "nll_loss": 0.44609373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.087499618530273, + "rewards/margins": 16.424999237060547, + "rewards/rejected": 0.674121081829071, + "step": 70, + "train_speed(iter/s)": 0.149098 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -0.703125, + "eval_logits/rejected": -1.625, + "eval_logps/chosen": -150.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.310546875, + "eval_nll_loss": 0.310546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.0625, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.69921875, + "eval_runtime": 2.3424, + "eval_samples_per_second": 1.708, + "eval_steps_per_second": 0.427, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.6458764930563424, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.3671875, + "logits/rejected": -1.3984375, + "logps/chosen": -510.0, + "logps/rejected": -680.4000244140625, + "loss": 0.4861083984375, + "memory(GiB)": 47.59, + "nll_loss": 0.486328125, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.462499618530273, + "rewards/margins": 16.649999618530273, + "rewards/rejected": 0.7554687261581421, + "step": 75, + "train_speed(iter/s)": 0.148719 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5294379022539796, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.404687523841858, + "logits/rejected": -1.404687523841858, + "logps/chosen": -432.79998779296875, + "logps/rejected": -620.7999877929688, + "loss": 0.46317138671875, + "memory(GiB)": 47.59, + "nll_loss": 0.47539061307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.850000381469727, + "rewards/margins": 17.174999237060547, + "rewards/rejected": -0.3223632872104645, + "step": 80, + "train_speed(iter/s)": 0.147985 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -0.703125, + "eval_logits/rejected": -1.6015625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.30712890625, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.125, + "eval_rewards/margins": 12.875, + "eval_rewards/rejected": 1.25, + "eval_runtime": 2.3227, + "eval_samples_per_second": 1.722, + "eval_steps_per_second": 0.431, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.5098063945805259, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.2625000476837158, + "logits/rejected": -1.5234375, + "logps/chosen": -390.79998779296875, + "logps/rejected": -747.2000122070312, + "loss": 0.4080902099609375, + "memory(GiB)": 47.59, + "nll_loss": 0.4078125059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.325000762939453, + "rewards/margins": 16.137500762939453, + "rewards/rejected": 1.209570288658142, + "step": 85, + "train_speed(iter/s)": 0.14806 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.2629503293334859, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.443750023841858, + "logits/rejected": -1.2296874523162842, + "logps/chosen": -483.20001220703125, + "logps/rejected": -589.5999755859375, + "loss": 0.42548675537109376, + "memory(GiB)": 47.59, + "nll_loss": 0.42500001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 18.5, + "rewards/margins": 16.799999237060547, + "rewards/rejected": 1.658789038658142, + "step": 90, + "train_speed(iter/s)": 0.149448 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -0.70703125, + "eval_logits/rejected": -1.59375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.306396484375, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.25, + "eval_rewards/margins": 13.0, + "eval_rewards/rejected": 1.1953125, + "eval_runtime": 2.3883, + "eval_samples_per_second": 1.675, + "eval_steps_per_second": 0.419, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.2528000224499017, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -1.3624999523162842, + "logits/rejected": -1.4609375, + "logps/chosen": -526.7999877929688, + "logps/rejected": -720.0, + "loss": 0.470208740234375, + "memory(GiB)": 47.59, + "nll_loss": 0.46953123807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 19.25, + "rewards/margins": 18.237499237060547, + "rewards/rejected": 1.026757836341858, + "step": 95, + "train_speed(iter/s)": 0.148517 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.4249614464684851, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -1.396875023841858, + "logits/rejected": -1.5390625, + "logps/chosen": -406.20001220703125, + "logps/rejected": -563.2000122070312, + "loss": 0.4023651123046875, + "memory(GiB)": 56.84, + "nll_loss": 0.40253907442092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.762500762939453, + "rewards/margins": 17.0625, + "rewards/rejected": -0.31132811307907104, + "step": 100, + "train_speed(iter/s)": 0.14936 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -0.703125, + "eval_logits/rejected": -1.59375, + "eval_logps/chosen": -148.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.306396484375, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.25, + "eval_rewards/margins": 13.25, + "eval_rewards/rejected": 1.0, + "eval_runtime": 2.3404, + "eval_samples_per_second": 1.709, + "eval_steps_per_second": 0.427, + "step": 100 + }, + { + "epoch": 2.763157894736842, + "grad_norm": 0.5639157494773256, + "learning_rate": 1.70370868554659e-06, + "logits/chosen": -1.41796875, + "logits/rejected": -1.4953124523162842, + "logps/chosen": -432.54998779296875, + "logps/rejected": -568.7999877929688, + "loss": 0.41885986328125, + "memory(GiB)": 56.84, + "nll_loss": 0.41874998807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.112499237060547, + "rewards/margins": 16.299999237060547, + "rewards/rejected": 0.7904297113418579, + "step": 105, + "train_speed(iter/s)": 0.149009 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 0.4926501392943744, + "learning_rate": 3.380821129028489e-07, + "logits/chosen": -1.357812523841858, + "logits/rejected": -1.529687523841858, + "logps/chosen": -453.6000061035156, + "logps/rejected": -603.5999755859375, + "loss": 0.39718017578125, + "memory(GiB)": 56.84, + "nll_loss": 0.3970703184604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.862499237060547, + "rewards/margins": 17.375, + "rewards/rejected": 0.46240234375, + "step": 110, + "train_speed(iter/s)": 0.149381 + }, + { + "epoch": 2.8947368421052633, + "eval_logits/chosen": -0.6953125, + "eval_logits/rejected": -1.5859375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.3076171875, + "eval_nll_loss": 0.30859375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.1875, + "eval_rewards/margins": 13.125, + "eval_rewards/rejected": 1.046875, + "eval_runtime": 2.3273, + "eval_samples_per_second": 1.719, + "eval_steps_per_second": 0.43, + "step": 110 + }, + { + "epoch": 3.0, + "eval_logits/chosen": -0.703125, + "eval_logits/rejected": -1.5859375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.3076171875, + "eval_nll_loss": 0.30859375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.25, + "eval_rewards/margins": 13.1875, + "eval_rewards/rejected": 1.0, + "eval_runtime": 2.2876, + "eval_samples_per_second": 1.749, + "eval_steps_per_second": 0.437, + "step": 114 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 359754023305216.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d6eb2d3b8b7c4320d19ebec79bd57b1c49c6f84 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea3fd7d1178b3676ea45fdfde0e899a240c24d4d03e55eb09be073e4c532e11 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/README.md b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c5c0404813bb3d884c2d0750e24391042738c029 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-14b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0f5ad0d738a546c5dfcb69e5366b5e203ca99083 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "o_proj", + "v_proj", + "q_proj", + "up_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7aad1184de528cd5f576026dc5368f805612dca0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9a9120ab7c8f8a2886ee001a80e84665f18a2befd874711f4a07ee49d8debf1 +size 68902296 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/args.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/args.json new file mode 100644 index 0000000000000000000000000000000000000000..b3b67557396afa371bd4ae4e246e102ed2523a72 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-14b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d4dcb7729038414ce942ac9d12009b219f0b884 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:323239d6d55fcbc3e59327bbb4d0a7668ada459552470386cd12449b837dd5ad +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcaebdd315257acf2ab30cb448b8af8614a2ca92 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc0277982a4c92b84e363d6534650e457e82fde36f535a3aae4b55647371e83 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f9d7c6309b1e2c84c4fcf1cd6164845b4d4800d1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84d51115506694088fe18fad0132f157809f723cc64403712b8224b62fda71f4 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad9e9589c94c78ce22d1b1069b51e0658b3cc36e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:137fcdbfaabcfd5a4cd5f2cc2ffa9b60d525067db88ed889879c819196177571 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..58e43295138eccd893863a8f72f5b04d3e26e669 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7802bbf849b5e55cba956bde2616982aa46af26fcc1f2d4762458ac928875c8 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dda9bdba6157b4b2e26c020ba5b8425a6a045b9b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8c3af9f3d1dc40299ef29bef7cf29707e95093a9321f460ce99129b8deb1f25 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1082850fed626dbee21cd200693ca794da77bec4 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b7682705d669c507a6aa3a5a5208f99a57a6e874cd3a1c20fe778f4a8b28c88 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a1a6dde7f53d6dac5194812d7ee041eb91fd2e4 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96895faf6c83c4560f720669c85865f7d5e72f8e858d00d1cdcd09d9259c6fc7 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..05e86de4a177fb97f0c143afaf9dcc4098b99be6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a132ec25e6c1d34867a625464c63159b473e9e36f82fb5ad09c21b0f0e2f0ac +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f9133a15655d28ff1e1f4efe91c31e7e23f8652 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d5c0caa1f5dc62ea83df7fabafe5a04998d7fc8bda5c0c096bc17d662d172e +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5daf083fe068e189b1f8baafddbec11464dc22e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3560ed019a7041d0b4499958fd90fdf8b65fb1b9ffd6586fd02b6dedf0fd3e6 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a512bf6fde687517d099706ebfc3772cf0dd0b7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbd85a56f951ff2f07deefe86539ea952c880679b1c4e2f9ee7c3ed56420987d +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c40d0dd137dc6a8b7a558a55ecba8ee4ead596d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1e677785727f2607581fef5a18b4352b48abdf0cfd722ec2ce42ad93eee2a43 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..82f1cd849c85a6b1686d68e80c752956384ff336 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d1543250bde591b5d07d479b057fc29110efdeaec2a01eb5197442e26369a83 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..060173f536423c04902395d83daad4cabb4c0d1c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:becacd733d0ad2583d7f13e1391b1f851461e96d21383c9b925451006db3e502 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe8dfdf82d58293ce827ea0289aa96663a6b6d89 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1d35f02bb2d4604e9f127721a4072884330f4ef742c0a8fb81a39c810f2874b +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/latest b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/latest new file mode 100644 index 0000000000000000000000000000000000000000..75eab498d0366633484ab40334e4b8fb92b16dad --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/latest @@ -0,0 +1 @@ +global_step80 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..0b8b4067e4559b34f9b554c4963fe80d7f5fe839 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba4c26c615bd5830d41566fab54dc69174be292761b34514b27fbe82b45b630b +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..8c0265e51b5761ac9b323aa87ba00ba14b97e202 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c761d7f9b90c29c2d348a1133fd39be52c65e6bee4c2d179f6a6e564eb3a40 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f5dd5aabcd6e7332f14a4796d6ec6c758e10aea0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccab847cc956e055fd3f9dcce06898826d065211e945b83576c8d487f87c5469 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..bcbdea3a573c2b7717f23e2ea0e4a6da6670d65d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e5f1dbdcf6ec820c22fd1e4258fcd7af2a2bce65c480988d3f111aa574c9c06 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..1cdcb8d1710063a6c30dec635b4c44e3cb6cd24e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a23184c3e806d2649776427d1da2c0c9137f9b23a84468f3bdd5bbc75f696c9 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e39323a662c284cd109b5ce8c39e8a0ce375f2c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382fc01b809542bf6f5e26742e3e19e80a1f189ac5de24cf8cd822e303916b83 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ce685d2e57181f70debfb25eb90cb76ceaf47da --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b178265c7d2ae07bff10b7312e5e49b9f5b4914c38969d2f64a6ca006296bca +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5e363b8083cdd817e0b3a2e6fd1b65a905e189b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:668825a859126c4cf32afb883895c91004130b6aee02178736ca2840e5429ad0 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eaf96d6803aea265d756d902db3c4cc2386f9742 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90524bcdb94734ac7120e4205110f14662bff8cee00eed50355875dcdc538029 +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a952fb22317aa6b706c73679e8ec5af348d23097 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/trainer_state.json @@ -0,0 +1,475 @@ +{ + "best_metric": 0.30712891, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80", + "epoch": 2.1052631578947367, + "eval_steps": 10, + "global_step": 80, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 9.854056570832201, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -2.15625, + "logits/rejected": -1.40625, + "logps/chosen": -704.0, + "logps/rejected": -416.0, + "loss": 1.337890625, + "memory(GiB)": 10.26, + "nll_loss": 0.6484375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.087961 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 13.591956786084207, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.595703125, + "logits/rejected": -1.576171875, + "logps/chosen": -721.0, + "logps/rejected": -575.5, + "loss": 2.0830078125, + "memory(GiB)": 20.01, + "nll_loss": 1.4130859375, + "rewards/accuracies": 0.3125, + "rewards/chosen": 0.06103515625, + "rewards/margins": 0.0452880859375, + "rewards/rejected": 0.0156402587890625, + "step": 5, + "train_speed(iter/s)": 0.140484 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 7.5902824333777845, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.5265624523162842, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -621.5999755859375, + "logps/rejected": -703.2000122070312, + "loss": 2.04072265625, + "memory(GiB)": 46.14, + "nll_loss": 1.5515625476837158, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 1.015625, + "rewards/margins": 0.6595703363418579, + "rewards/rejected": 0.3578124940395355, + "step": 10, + "train_speed(iter/s)": 0.141561 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -0.9375, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -256.0, + "eval_logps/rejected": -1096.0, + "eval_loss": 0.7734375, + "eval_nll_loss": 0.66015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 3.46875, + "eval_rewards/margins": 2.125, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 2.3433, + "eval_samples_per_second": 1.707, + "eval_steps_per_second": 0.427, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.131075980613862, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.720312476158142, + "logits/rejected": -1.673437476158142, + "logps/chosen": -642.7999877929688, + "logps/rejected": -580.0, + "loss": 1.05107421875, + "memory(GiB)": 46.14, + "nll_loss": 0.87109375, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 4.015625, + "rewards/margins": 2.4703125953674316, + "rewards/rejected": 1.545312523841858, + "step": 15, + "train_speed(iter/s)": 0.146121 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 1.673054658387307, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.7312500476837158, + "logps/chosen": -402.3999938964844, + "logps/rejected": -565.5999755859375, + "loss": 0.83876953125, + "memory(GiB)": 46.14, + "nll_loss": 0.811718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.206250190734863, + "rewards/margins": 4.456250190734863, + "rewards/rejected": 3.7593750953674316, + "step": 20, + "train_speed(iter/s)": 0.148181 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -0.875, + "eval_logits/rejected": -1.8203125, + "eval_logps/chosen": -180.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.39208984375, + "eval_nll_loss": 0.37890625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 4.9375, + "eval_rewards/rejected": 6.09375, + "eval_runtime": 2.3282, + "eval_samples_per_second": 1.718, + "eval_steps_per_second": 0.43, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.3628742287518036, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6453125476837158, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -561.5999755859375, + "logps/rejected": -661.2000122070312, + "loss": 0.58045654296875, + "memory(GiB)": 46.14, + "nll_loss": 0.571093738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.899999618530273, + "rewards/margins": 6.381249904632568, + "rewards/rejected": 4.528124809265137, + "step": 25, + "train_speed(iter/s)": 0.146228 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.7276190764282714, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.5343749523162842, + "logits/rejected": -1.7734375, + "logps/chosen": -405.70001220703125, + "logps/rejected": -592.7999877929688, + "loss": 0.5160888671875, + "memory(GiB)": 46.14, + "nll_loss": 0.515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.899999618530273, + "rewards/margins": 8.46875, + "rewards/rejected": 3.450000047683716, + "step": 30, + "train_speed(iter/s)": 0.146969 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.0078125, + "eval_logits/rejected": -1.8125, + "eval_logps/chosen": -167.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.348876953125, + "eval_nll_loss": 0.34765625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 12.375, + "eval_rewards/margins": 8.5, + "eval_rewards/rejected": 3.84375, + "eval_runtime": 2.3448, + "eval_samples_per_second": 1.706, + "eval_steps_per_second": 0.426, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.8371169486087437, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.728124976158142, + "logits/rejected": -1.7218749523162842, + "logps/chosen": -619.2000122070312, + "logps/rejected": -638.4000244140625, + "loss": 0.568194580078125, + "memory(GiB)": 46.14, + "nll_loss": 0.5648437738418579, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.912500381469727, + "rewards/margins": 9.756250381469727, + "rewards/rejected": 4.153124809265137, + "step": 35, + "train_speed(iter/s)": 0.14542 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7334467774160591, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.662500023841858, + "logits/rejected": -1.6984374523162842, + "logps/chosen": -471.6000061035156, + "logps/rejected": -683.2000122070312, + "loss": 0.53173828125, + "memory(GiB)": 46.14, + "nll_loss": 0.586718738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.212499618530273, + "rewards/margins": 11.912500381469727, + "rewards/rejected": 2.2855467796325684, + "step": 40, + "train_speed(iter/s)": 0.146745 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -0.984375, + "eval_logits/rejected": -1.734375, + "eval_logps/chosen": -159.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.32861328125, + "eval_nll_loss": 0.328125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.1875, + "eval_rewards/margins": 14.0625, + "eval_rewards/rejected": -0.8515625, + "eval_runtime": 2.3307, + "eval_samples_per_second": 1.716, + "eval_steps_per_second": 0.429, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.546250666668433, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.5578124523162842, + "logits/rejected": -1.59375, + "logps/chosen": -442.3999938964844, + "logps/rejected": -720.4000244140625, + "loss": 0.44443359375, + "memory(GiB)": 47.59, + "nll_loss": 0.44414061307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.1875, + "rewards/margins": 15.899999618530273, + "rewards/rejected": -0.7007812261581421, + "step": 45, + "train_speed(iter/s)": 0.145269 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.44204175223844183, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.578125, + "logits/rejected": -1.704687476158142, + "logps/chosen": -491.6000061035156, + "logps/rejected": -678.0, + "loss": 0.50015869140625, + "memory(GiB)": 47.59, + "nll_loss": 0.500781238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.3125, + "rewards/margins": 16.875, + "rewards/rejected": -0.5650390386581421, + "step": 50, + "train_speed(iter/s)": 0.146516 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.0, + "eval_logits/rejected": -1.671875, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1128.0, + "eval_loss": 0.315185546875, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.875, + "eval_rewards/margins": 15.0625, + "eval_rewards/rejected": -1.1484375, + "eval_runtime": 2.3162, + "eval_samples_per_second": 1.727, + "eval_steps_per_second": 0.432, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2875997592353248, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.4328124523162842, + "logits/rejected": -1.529687523841858, + "logps/chosen": -471.6000061035156, + "logps/rejected": -622.4000244140625, + "loss": 0.4770263671875, + "memory(GiB)": 47.59, + "nll_loss": 0.4765625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.575000762939453, + "rewards/margins": 16.112499237060547, + "rewards/rejected": 0.4677734375, + "step": 55, + "train_speed(iter/s)": 0.147535 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.21479272542751, + "learning_rate": 5e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.342187523841858, + "logps/chosen": -517.2000122070312, + "logps/rejected": -496.79998779296875, + "loss": 0.4359375, + "memory(GiB)": 47.59, + "nll_loss": 0.4359374940395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.912500381469727, + "rewards/margins": 16.274999618530273, + "rewards/rejected": 0.638476550579071, + "step": 60, + "train_speed(iter/s)": 0.148886 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -0.828125, + "eval_logits/rejected": -1.6328125, + "eval_logps/chosen": -151.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.310546875, + "eval_nll_loss": 0.310546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.0, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": -0.546875, + "eval_runtime": 2.3283, + "eval_samples_per_second": 1.718, + "eval_steps_per_second": 0.43, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.5495600856192168, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.5078125, + "logits/rejected": -1.4968750476837158, + "logps/chosen": -452.3999938964844, + "logps/rejected": -544.7999877929688, + "loss": 0.409503173828125, + "memory(GiB)": 47.59, + "nll_loss": 0.4097656309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.987499237060547, + "rewards/margins": 16.762500762939453, + "rewards/rejected": 0.22265625, + "step": 65, + "train_speed(iter/s)": 0.148787 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.3929300642395231, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.384374976158142, + "logits/rejected": -1.5164062976837158, + "logps/chosen": -402.79998779296875, + "logps/rejected": -641.2000122070312, + "loss": 0.4465576171875, + "memory(GiB)": 47.59, + "nll_loss": 0.44609373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.087499618530273, + "rewards/margins": 16.424999237060547, + "rewards/rejected": 0.674121081829071, + "step": 70, + "train_speed(iter/s)": 0.149098 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -0.703125, + "eval_logits/rejected": -1.625, + "eval_logps/chosen": -150.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.310546875, + "eval_nll_loss": 0.310546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.0625, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.69921875, + "eval_runtime": 2.3424, + "eval_samples_per_second": 1.708, + "eval_steps_per_second": 0.427, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.6458764930563424, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.3671875, + "logits/rejected": -1.3984375, + "logps/chosen": -510.0, + "logps/rejected": -680.4000244140625, + "loss": 0.4861083984375, + "memory(GiB)": 47.59, + "nll_loss": 0.486328125, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.462499618530273, + "rewards/margins": 16.649999618530273, + "rewards/rejected": 0.7554687261581421, + "step": 75, + "train_speed(iter/s)": 0.148719 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5294379022539796, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.404687523841858, + "logits/rejected": -1.404687523841858, + "logps/chosen": -432.79998779296875, + "logps/rejected": -620.7999877929688, + "loss": 0.46317138671875, + "memory(GiB)": 47.59, + "nll_loss": 0.47539061307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.850000381469727, + "rewards/margins": 17.174999237060547, + "rewards/rejected": -0.3223632872104645, + "step": 80, + "train_speed(iter/s)": 0.147985 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -0.703125, + "eval_logits/rejected": -1.6015625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.30712890625, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.125, + "eval_rewards/margins": 12.875, + "eval_rewards/rejected": 1.25, + "eval_runtime": 2.3227, + "eval_samples_per_second": 1.722, + "eval_steps_per_second": 0.431, + "step": 80 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 254195476463616.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d6eb2d3b8b7c4320d19ebec79bd57b1c49c6f84 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea3fd7d1178b3676ea45fdfde0e899a240c24d4d03e55eb09be073e4c532e11 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-80/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/README.md b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c5c0404813bb3d884c2d0750e24391042738c029 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-14b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0f5ad0d738a546c5dfcb69e5366b5e203ca99083 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "o_proj", + "v_proj", + "q_proj", + "up_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0b63c3670ff9c699d450c13aa787f9f2deb4ed9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3bc430f500cb747c2055135c6fe1944b6425c4a23b98046a8b50a7fc41540cb +size 68902296 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/args.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/args.json new file mode 100644 index 0000000000000000000000000000000000000000..b3b67557396afa371bd4ae4e246e102ed2523a72 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-14b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cc13c885c7ff120d0e4ff2cc3782f5f0429fd50 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e45f1e53e71da0ff8fdf521539cbdcd86ebab7e5968f2c21712d1ebfe22740 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b805bb82311c2442cc25d5f54078c1ec3818cd69 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc62127088e7dfdf80b5f09605098df75073f317be31b78e01399058994c49eb +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed4b4b8203469fd12617a0ccf7e620e009930b90 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e26735ad19a04d189ff43abe0497e213655786bd0135c327e4524ae8ecea2467 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e626cefcda650ab2607623a7fd34959b02d0695 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:926aab5a1941b700ae8be528cf4cbbfff18ab433e8ff4bff2bd39561ebc39000 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..60c3f98dee6d19806b697d446c396ace4740adc7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d9fea70e9630c4130a74a560c6c202e256629e37275e1d89943ac9a342e1590 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a132631dcf8a0beff4a6f01be99730551aeafc0c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9340ce1d9846c58fd6241085f751f5f71ba5ef45e57eb7337b9bf6bf5979b96d +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..de5ec915012bcbfd6f70005a1e0d220c9660d886 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:152872ecb227e3cf530f93a8366d3d3e071a8307238702b07e35743ba2aeeec0 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..44ee4c650568e8c53f1862ec64c030a420134d70 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:316b08acd70ee7aa12623229db006d902b5efa70e554d3d8de53ffd73a7e18ea +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..902240eb87a78d637ac373831cba6ecc56153534 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb13080749a65b63ac65449f8a3f8a01f562a1f39f2452c48c7f588bd7eea0b5 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a2b9a1a41ac2c8e9aaf22b9aee01cbd96316793 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77bf7c00e4e8f27e1386a6f463f82e22dacae418773442295c7a6150bc534669 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fbefb263b3b25ecf78e50e78f6b0447edc6cf2b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d6bb166980243cd44320fdc36511999b08dd074ec072b15074e0abbbb346c73 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3db4911aec6a0ff6d91acb611346193573b4a94 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aacb2c7e389a7616caa21409777bd17a902bc249d0502d51b0bd0e09cf068b20 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4ad54f556a4fbf8a5343c33efc1c6e61edc1c3f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2a10f63355732c06e45b15116f04748b05efd74f13d532c707ef1fae6235e8e +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3c4f7c7524f85b80de1cb6e8214b913ea994af6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215c98b528870000bd6c383dac4fb22ef4a0957ec7e406109e6ba1600dc8a7d8 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b68ee8493c80a9bb16791efbc63e05a5451e05ed --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f66c8cbab676566ba8762a4597e92092a9ad8f768c647508c0838e5a2575003b +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8582ec2511fec325a8fc968e30b3c7857cc4e428 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d075470e2153866bfc1cc14323d4495949224f9c9651b5a756c88018ae56a3e +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/latest b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/latest new file mode 100644 index 0000000000000000000000000000000000000000..8e7a337e2cb23bf07023d223dd647df2d25f0fc1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/latest @@ -0,0 +1 @@ +global_step90 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e31a2394e12bf431ae13288c3d90fe4727f07fa7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feb6462d333dbc5bb5e497ea9b0adb960f7616f79e6eea63222de6d5bd559516 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d1db0a0f44aa3ac1d82c3bf8dc2d8968eeba4ce7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b045e1bfa728f51c8b51ab0faa20b128a4fbd350da006b9b39a19e24abdf5a74 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..75de18f57a056bd6a5f89df1abd045678f3f919e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76a3d058d2628a61848c2441d313f251278bd8f74ce43dc44d8cd8ad3e619a8 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fd100693bc9f3267d044ce4a16e702502dc03ec --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f72fc498e6eaa671cdc0e8a627a668b8ef607063a22ddb4edbc05e791be830 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..5aeeabfe119f1cb0c8c804f1b9a4d3049f478d69 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12889af98e175b734a788f4c5b8c4da91dd61ff3a05aaf61b9d4c66aa3dd8ad6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..91fe0f42382ab06f4d26d753745a914c9e46100e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe21a86abfceeac2cf2f48afd61a9a506cf61a287f3403f1adf391bb2ffa5a83 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..5830ca6bd04645962b6e56a00a91cd8349ca449c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73488bec91f9dee6d8105d06f99edaf4d27b6b064250d4c7023f33285b2f3132 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..343d1c0475f0dc64100dc67b09195e047f1a7bcf --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf6ee1cc2e1325b428a21172ec4e61b7220c5489751ea11c06bb66c77a0cd08 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a39c9cfeaa2d69cb5a66e83272eee65ddffaed5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b768777109679597db5d1fa24a743962bede33623e22702b13b95eab2d42cb8 +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2fd6111716b662f867b5b1111f511c70a4a82361 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/trainer_state.json @@ -0,0 +1,528 @@ +{ + "best_metric": 0.30639648, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90", + "epoch": 2.3684210526315788, + "eval_steps": 10, + "global_step": 90, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 9.854056570832201, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -2.15625, + "logits/rejected": -1.40625, + "logps/chosen": -704.0, + "logps/rejected": -416.0, + "loss": 1.337890625, + "memory(GiB)": 10.26, + "nll_loss": 0.6484375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.087961 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 13.591956786084207, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.595703125, + "logits/rejected": -1.576171875, + "logps/chosen": -721.0, + "logps/rejected": -575.5, + "loss": 2.0830078125, + "memory(GiB)": 20.01, + "nll_loss": 1.4130859375, + "rewards/accuracies": 0.3125, + "rewards/chosen": 0.06103515625, + "rewards/margins": 0.0452880859375, + "rewards/rejected": 0.0156402587890625, + "step": 5, + "train_speed(iter/s)": 0.140484 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 7.5902824333777845, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.5265624523162842, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -621.5999755859375, + "logps/rejected": -703.2000122070312, + "loss": 2.04072265625, + "memory(GiB)": 46.14, + "nll_loss": 1.5515625476837158, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 1.015625, + "rewards/margins": 0.6595703363418579, + "rewards/rejected": 0.3578124940395355, + "step": 10, + "train_speed(iter/s)": 0.141561 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -0.9375, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -256.0, + "eval_logps/rejected": -1096.0, + "eval_loss": 0.7734375, + "eval_nll_loss": 0.66015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 3.46875, + "eval_rewards/margins": 2.125, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 2.3433, + "eval_samples_per_second": 1.707, + "eval_steps_per_second": 0.427, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.131075980613862, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.720312476158142, + "logits/rejected": -1.673437476158142, + "logps/chosen": -642.7999877929688, + "logps/rejected": -580.0, + "loss": 1.05107421875, + "memory(GiB)": 46.14, + "nll_loss": 0.87109375, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 4.015625, + "rewards/margins": 2.4703125953674316, + "rewards/rejected": 1.545312523841858, + "step": 15, + "train_speed(iter/s)": 0.146121 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 1.673054658387307, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.7312500476837158, + "logps/chosen": -402.3999938964844, + "logps/rejected": -565.5999755859375, + "loss": 0.83876953125, + "memory(GiB)": 46.14, + "nll_loss": 0.811718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.206250190734863, + "rewards/margins": 4.456250190734863, + "rewards/rejected": 3.7593750953674316, + "step": 20, + "train_speed(iter/s)": 0.148181 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -0.875, + "eval_logits/rejected": -1.8203125, + "eval_logps/chosen": -180.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.39208984375, + "eval_nll_loss": 0.37890625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 4.9375, + "eval_rewards/rejected": 6.09375, + "eval_runtime": 2.3282, + "eval_samples_per_second": 1.718, + "eval_steps_per_second": 0.43, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.3628742287518036, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6453125476837158, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -561.5999755859375, + "logps/rejected": -661.2000122070312, + "loss": 0.58045654296875, + "memory(GiB)": 46.14, + "nll_loss": 0.571093738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.899999618530273, + "rewards/margins": 6.381249904632568, + "rewards/rejected": 4.528124809265137, + "step": 25, + "train_speed(iter/s)": 0.146228 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.7276190764282714, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.5343749523162842, + "logits/rejected": -1.7734375, + "logps/chosen": -405.70001220703125, + "logps/rejected": -592.7999877929688, + "loss": 0.5160888671875, + "memory(GiB)": 46.14, + "nll_loss": 0.515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.899999618530273, + "rewards/margins": 8.46875, + "rewards/rejected": 3.450000047683716, + "step": 30, + "train_speed(iter/s)": 0.146969 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.0078125, + "eval_logits/rejected": -1.8125, + "eval_logps/chosen": -167.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.348876953125, + "eval_nll_loss": 0.34765625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 12.375, + "eval_rewards/margins": 8.5, + "eval_rewards/rejected": 3.84375, + "eval_runtime": 2.3448, + "eval_samples_per_second": 1.706, + "eval_steps_per_second": 0.426, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.8371169486087437, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.728124976158142, + "logits/rejected": -1.7218749523162842, + "logps/chosen": -619.2000122070312, + "logps/rejected": -638.4000244140625, + "loss": 0.568194580078125, + "memory(GiB)": 46.14, + "nll_loss": 0.5648437738418579, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.912500381469727, + "rewards/margins": 9.756250381469727, + "rewards/rejected": 4.153124809265137, + "step": 35, + "train_speed(iter/s)": 0.14542 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7334467774160591, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.662500023841858, + "logits/rejected": -1.6984374523162842, + "logps/chosen": -471.6000061035156, + "logps/rejected": -683.2000122070312, + "loss": 0.53173828125, + "memory(GiB)": 46.14, + "nll_loss": 0.586718738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.212499618530273, + "rewards/margins": 11.912500381469727, + "rewards/rejected": 2.2855467796325684, + "step": 40, + "train_speed(iter/s)": 0.146745 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -0.984375, + "eval_logits/rejected": -1.734375, + "eval_logps/chosen": -159.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.32861328125, + "eval_nll_loss": 0.328125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.1875, + "eval_rewards/margins": 14.0625, + "eval_rewards/rejected": -0.8515625, + "eval_runtime": 2.3307, + "eval_samples_per_second": 1.716, + "eval_steps_per_second": 0.429, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.546250666668433, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.5578124523162842, + "logits/rejected": -1.59375, + "logps/chosen": -442.3999938964844, + "logps/rejected": -720.4000244140625, + "loss": 0.44443359375, + "memory(GiB)": 47.59, + "nll_loss": 0.44414061307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.1875, + "rewards/margins": 15.899999618530273, + "rewards/rejected": -0.7007812261581421, + "step": 45, + "train_speed(iter/s)": 0.145269 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.44204175223844183, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.578125, + "logits/rejected": -1.704687476158142, + "logps/chosen": -491.6000061035156, + "logps/rejected": -678.0, + "loss": 0.50015869140625, + "memory(GiB)": 47.59, + "nll_loss": 0.500781238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.3125, + "rewards/margins": 16.875, + "rewards/rejected": -0.5650390386581421, + "step": 50, + "train_speed(iter/s)": 0.146516 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.0, + "eval_logits/rejected": -1.671875, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1128.0, + "eval_loss": 0.315185546875, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.875, + "eval_rewards/margins": 15.0625, + "eval_rewards/rejected": -1.1484375, + "eval_runtime": 2.3162, + "eval_samples_per_second": 1.727, + "eval_steps_per_second": 0.432, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2875997592353248, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.4328124523162842, + "logits/rejected": -1.529687523841858, + "logps/chosen": -471.6000061035156, + "logps/rejected": -622.4000244140625, + "loss": 0.4770263671875, + "memory(GiB)": 47.59, + "nll_loss": 0.4765625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.575000762939453, + "rewards/margins": 16.112499237060547, + "rewards/rejected": 0.4677734375, + "step": 55, + "train_speed(iter/s)": 0.147535 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.21479272542751, + "learning_rate": 5e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.342187523841858, + "logps/chosen": -517.2000122070312, + "logps/rejected": -496.79998779296875, + "loss": 0.4359375, + "memory(GiB)": 47.59, + "nll_loss": 0.4359374940395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.912500381469727, + "rewards/margins": 16.274999618530273, + "rewards/rejected": 0.638476550579071, + "step": 60, + "train_speed(iter/s)": 0.148886 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -0.828125, + "eval_logits/rejected": -1.6328125, + "eval_logps/chosen": -151.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.310546875, + "eval_nll_loss": 0.310546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.0, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": -0.546875, + "eval_runtime": 2.3283, + "eval_samples_per_second": 1.718, + "eval_steps_per_second": 0.43, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.5495600856192168, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.5078125, + "logits/rejected": -1.4968750476837158, + "logps/chosen": -452.3999938964844, + "logps/rejected": -544.7999877929688, + "loss": 0.409503173828125, + "memory(GiB)": 47.59, + "nll_loss": 0.4097656309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.987499237060547, + "rewards/margins": 16.762500762939453, + "rewards/rejected": 0.22265625, + "step": 65, + "train_speed(iter/s)": 0.148787 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.3929300642395231, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.384374976158142, + "logits/rejected": -1.5164062976837158, + "logps/chosen": -402.79998779296875, + "logps/rejected": -641.2000122070312, + "loss": 0.4465576171875, + "memory(GiB)": 47.59, + "nll_loss": 0.44609373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.087499618530273, + "rewards/margins": 16.424999237060547, + "rewards/rejected": 0.674121081829071, + "step": 70, + "train_speed(iter/s)": 0.149098 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -0.703125, + "eval_logits/rejected": -1.625, + "eval_logps/chosen": -150.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.310546875, + "eval_nll_loss": 0.310546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.0625, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.69921875, + "eval_runtime": 2.3424, + "eval_samples_per_second": 1.708, + "eval_steps_per_second": 0.427, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.6458764930563424, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.3671875, + "logits/rejected": -1.3984375, + "logps/chosen": -510.0, + "logps/rejected": -680.4000244140625, + "loss": 0.4861083984375, + "memory(GiB)": 47.59, + "nll_loss": 0.486328125, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.462499618530273, + "rewards/margins": 16.649999618530273, + "rewards/rejected": 0.7554687261581421, + "step": 75, + "train_speed(iter/s)": 0.148719 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5294379022539796, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.404687523841858, + "logits/rejected": -1.404687523841858, + "logps/chosen": -432.79998779296875, + "logps/rejected": -620.7999877929688, + "loss": 0.46317138671875, + "memory(GiB)": 47.59, + "nll_loss": 0.47539061307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.850000381469727, + "rewards/margins": 17.174999237060547, + "rewards/rejected": -0.3223632872104645, + "step": 80, + "train_speed(iter/s)": 0.147985 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -0.703125, + "eval_logits/rejected": -1.6015625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.30712890625, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.125, + "eval_rewards/margins": 12.875, + "eval_rewards/rejected": 1.25, + "eval_runtime": 2.3227, + "eval_samples_per_second": 1.722, + "eval_steps_per_second": 0.431, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.5098063945805259, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.2625000476837158, + "logits/rejected": -1.5234375, + "logps/chosen": -390.79998779296875, + "logps/rejected": -747.2000122070312, + "loss": 0.4080902099609375, + "memory(GiB)": 47.59, + "nll_loss": 0.4078125059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.325000762939453, + "rewards/margins": 16.137500762939453, + "rewards/rejected": 1.209570288658142, + "step": 85, + "train_speed(iter/s)": 0.14806 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.2629503293334859, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.443750023841858, + "logits/rejected": -1.2296874523162842, + "logps/chosen": -483.20001220703125, + "logps/rejected": -589.5999755859375, + "loss": 0.42548675537109376, + "memory(GiB)": 47.59, + "nll_loss": 0.42500001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 18.5, + "rewards/margins": 16.799999237060547, + "rewards/rejected": 1.658789038658142, + "step": 90, + "train_speed(iter/s)": 0.149448 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -0.70703125, + "eval_logits/rejected": -1.59375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.306396484375, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.25, + "eval_rewards/margins": 13.0, + "eval_rewards/rejected": 1.1953125, + "eval_runtime": 2.3883, + "eval_samples_per_second": 1.675, + "eval_steps_per_second": 0.419, + "step": 90 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 283981896744960.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d6eb2d3b8b7c4320d19ebec79bd57b1c49c6f84 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea3fd7d1178b3676ea45fdfde0e899a240c24d4d03e55eb09be073e4c532e11 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_logits_chosen.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_logits_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..74492cbcfbbc51d14ceba04bcca32ae299320e58 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_logits_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_logits_rejected.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_logits_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..3358374e115517666b743476405f95e9c35b7cca Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_logits_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_logps_chosen.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_logps_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..dcf1119648b6966d9325b95ed457d7f69e06d1d4 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_logps_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_logps_rejected.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_logps_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..557188dc02026ea8290b5da8d34cff541c660b7d Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_logps_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_loss.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..709b5cb35d42cfc34f5fd1cc4b4b84f51aaf34f8 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_nll_loss.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_nll_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..7a3cf69c816a45a47270b8d4f457579b4576cc6b Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_nll_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_rewards_accuracies.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_rewards_accuracies.png new file mode 100644 index 0000000000000000000000000000000000000000..2d2698dd0f6f8ad18db2b94a0c23ae94df643a78 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_rewards_accuracies.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_rewards_chosen.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_rewards_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..11dd8fa6f6bdce696777b06f0f25d3794aa045ac Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_rewards_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_rewards_margins.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_rewards_margins.png new file mode 100644 index 0000000000000000000000000000000000000000..09d9c4a08e2e6a0fa1c1970b76bfa0fa5ec9fe14 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_rewards_margins.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_rewards_rejected.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_rewards_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..5d3e70861d3f46d436616efc70c83ec1266e3d8d Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_rewards_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_runtime.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..ad0876460c0efe1c67c73c50135ca128ffe79c1a Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_runtime.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_samples_per_second.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_samples_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..64060c5ee792f04cb4406d95e18f22c7c569643a Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_samples_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_steps_per_second.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_steps_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..bc7ab262ba10390e278b84cffe86999d8c531a72 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/eval_steps_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_epoch.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_epoch.png new file mode 100644 index 0000000000000000000000000000000000000000..c152c9bbb1100d680125e899d3d0da3db98dc221 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_epoch.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_grad_norm.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_grad_norm.png new file mode 100644 index 0000000000000000000000000000000000000000..74e2a897e442bb836a146dfa28ce0bcd4de804bd Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_grad_norm.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_learning_rate.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..e35e42c173f451f9666ba448872c615528441c9e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_learning_rate.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_logits_chosen.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_logits_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..afa4e967ffe2a1c2fcc486e7cf318c5f01a3a618 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_logits_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_logits_rejected.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_logits_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..de6b43f33470961d128e9a7cf71c4659e37aec22 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_logits_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_logps_chosen.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_logps_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..bada165cd5e6f07890887fa6a398a6c788983fca Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_logps_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_logps_rejected.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_logps_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..c44763bcc59a2cf4e6842198777f3ce9591d22a2 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_logps_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_loss.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..fdd449e1cebbd5bb769aec311821568b8ba28603 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_memory(GiB).png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_memory(GiB).png new file mode 100644 index 0000000000000000000000000000000000000000..8fa9ca11a4aaa41c84c01e64cff554d26678c8fd Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_memory(GiB).png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_nll_loss.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_nll_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..23894be8ac5587cc68b12ef2adc4304475b2c25a Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_nll_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_rewards_accuracies.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_rewards_accuracies.png new file mode 100644 index 0000000000000000000000000000000000000000..e6db458876f551e592d07fb889087c14d2a673a5 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_rewards_accuracies.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_rewards_chosen.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_rewards_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..6d63022949b12a6732e43cc24ac1c5f9ec176e4a Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_rewards_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_rewards_margins.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_rewards_margins.png new file mode 100644 index 0000000000000000000000000000000000000000..ea628407b13769ad072b2efd7aff644296e8926b Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_rewards_margins.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_rewards_rejected.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_rewards_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..bd9697b25667aa3683c415ebb891ffbfecfd175e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_rewards_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_total_flos.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_total_flos.png new file mode 100644 index 0000000000000000000000000000000000000000..c5fecc3a65f915cc9bfb03a3da5259e0afccbfa4 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_total_flos.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_loss.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..32be31f8084586c1c2a0b3fa994e0851b3bf5250 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_runtime.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..d2c2d1e43c389094712a03340ff2f58c765f9498 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_runtime.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_samples_per_second.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_samples_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..fa370c8b4a85eb32d41721dfd39c8154cfe28194 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_samples_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_speed(iter_s).png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_speed(iter_s).png new file mode 100644 index 0000000000000000000000000000000000000000..52df876b089090df4fb54930d98bcbad71898d53 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_speed(iter_s).png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_steps_per_second.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_steps_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..af6310b322c918247b5989331bfd4a714a7bce6e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/images/train_train_steps_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/logging.jsonl b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/logging.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..91264153298d487a81647ae39549bd0464d591b3 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/logging.jsonl @@ -0,0 +1,37 @@ +{"loss": 1.33789062, "grad_norm": 9.85405657, "learning_rate": 1.667e-05, "memory(GiB)": 10.26, "train_speed(iter/s)": 0.087961, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -704.0, "logps/rejected": -416.0, "logits/chosen": -2.15625, "logits/rejected": -1.40625, "nll_loss": 0.6484375, "epoch": 0.02631579, "global_step/max_steps": "1/114", "percentage": "0.88%", "elapsed_time": "7s", "remaining_time": "14m 29s"} +{"loss": 2.08300781, "grad_norm": 13.59195679, "learning_rate": 8.333e-05, "memory(GiB)": 20.01, "train_speed(iter/s)": 0.140484, "rewards/chosen": 0.06103516, "rewards/rejected": 0.01564026, "rewards/accuracies": 0.3125, "rewards/margins": 0.04528809, "logps/chosen": -721.0, "logps/rejected": -575.5, "logits/chosen": -1.59570312, "logits/rejected": -1.57617188, "nll_loss": 1.41308594, "epoch": 0.13157895, "global_step/max_steps": "5/114", "percentage": "4.39%", "elapsed_time": "31s", "remaining_time": "11m 35s"} +{"loss": 2.04072266, "grad_norm": 7.59028243, "learning_rate": 9.966e-05, "memory(GiB)": 46.14, "train_speed(iter/s)": 0.141561, "rewards/chosen": 1.015625, "rewards/rejected": 0.35781249, "rewards/accuracies": 0.77499998, "rewards/margins": 0.65957034, "logps/chosen": -621.59997559, "logps/rejected": -703.20001221, "logits/chosen": -1.52656245, "logits/rejected": -1.63750005, "nll_loss": 1.55156255, "epoch": 0.26315789, "global_step/max_steps": "10/114", "percentage": "8.77%", "elapsed_time": "1m 6s", "remaining_time": "11m 36s"} +{"eval_loss": 0.7734375, "eval_runtime": 2.3433, "eval_samples_per_second": 1.707, "eval_steps_per_second": 0.427, "eval_rewards/chosen": 3.46875, "eval_rewards/rejected": 1.3515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 2.125, "eval_logps/chosen": -256.0, "eval_logps/rejected": -1096.0, "eval_logits/chosen": -0.9375, "eval_logits/rejected": -1.828125, "eval_nll_loss": 0.66015625, "epoch": 0.26315789, "global_step/max_steps": "10/114", "percentage": "8.77%", "elapsed_time": "1m 9s", "remaining_time": "12m 0s"} +{"loss": 1.05107422, "grad_norm": 2.13107598, "learning_rate": 9.83e-05, "memory(GiB)": 46.14, "train_speed(iter/s)": 0.146121, "rewards/chosen": 4.015625, "rewards/rejected": 1.54531252, "rewards/accuracies": 0.94999999, "rewards/margins": 2.4703126, "logps/chosen": -642.79998779, "logps/rejected": -580.0, "logits/chosen": -1.72031248, "logits/rejected": -1.67343748, "nll_loss": 0.87109375, "epoch": 0.39473684, "global_step/max_steps": "15/114", "percentage": "13.16%", "elapsed_time": "1m 38s", "remaining_time": "10m 53s"} +{"loss": 0.83876953, "grad_norm": 1.67305466, "learning_rate": 9.591e-05, "memory(GiB)": 46.14, "train_speed(iter/s)": 0.148181, "rewards/chosen": 8.20625019, "rewards/rejected": 3.7593751, "rewards/accuracies": 1.0, "rewards/margins": 4.45625019, "logps/chosen": -402.3999939, "logps/rejected": -565.59997559, "logits/chosen": -1.64999998, "logits/rejected": -1.73125005, "nll_loss": 0.81171876, "epoch": 0.52631579, "global_step/max_steps": "20/114", "percentage": "17.54%", "elapsed_time": "2m 11s", "remaining_time": "10m 17s"} +{"eval_loss": 0.39208984, "eval_runtime": 2.3282, "eval_samples_per_second": 1.718, "eval_steps_per_second": 0.43, "eval_rewards/chosen": 11.0625, "eval_rewards/rejected": 6.09375, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 4.9375, "eval_logps/chosen": -180.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -0.875, "eval_logits/rejected": -1.8203125, "eval_nll_loss": 0.37890625, "epoch": 0.52631579, "global_step/max_steps": "20/114", "percentage": "17.54%", "elapsed_time": "2m 13s", "remaining_time": "10m 28s"} +{"loss": 0.58045654, "grad_norm": 1.36287423, "learning_rate": 9.256e-05, "memory(GiB)": 46.14, "train_speed(iter/s)": 0.146228, "rewards/chosen": 10.89999962, "rewards/rejected": 4.52812481, "rewards/accuracies": 1.0, "rewards/margins": 6.3812499, "logps/chosen": -561.59997559, "logps/rejected": -661.20001221, "logits/chosen": -1.64531255, "logits/rejected": -1.63750005, "nll_loss": 0.57109374, "epoch": 0.65789474, "global_step/max_steps": "25/114", "percentage": "21.93%", "elapsed_time": "2m 47s", "remaining_time": "9m 55s"} +{"loss": 0.51608887, "grad_norm": 0.72761908, "learning_rate": 8.83e-05, "memory(GiB)": 46.14, "train_speed(iter/s)": 0.146969, "rewards/chosen": 11.89999962, "rewards/rejected": 3.45000005, "rewards/accuracies": 1.0, "rewards/margins": 8.46875, "logps/chosen": -405.70001221, "logps/rejected": -592.79998779, "logits/chosen": -1.53437495, "logits/rejected": -1.7734375, "nll_loss": 0.515625, "epoch": 0.78947368, "global_step/max_steps": "30/114", "percentage": "26.32%", "elapsed_time": "3m 20s", "remaining_time": "9m 21s"} +{"eval_loss": 0.34887695, "eval_runtime": 2.3448, "eval_samples_per_second": 1.706, "eval_steps_per_second": 0.426, "eval_rewards/chosen": 12.375, "eval_rewards/rejected": 3.84375, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 8.5, "eval_logps/chosen": -167.0, "eval_logps/rejected": -1072.0, "eval_logits/chosen": -1.0078125, "eval_logits/rejected": -1.8125, "eval_nll_loss": 0.34765625, "epoch": 0.78947368, "global_step/max_steps": "30/114", "percentage": "26.32%", "elapsed_time": "3m 22s", "remaining_time": "9m 27s"} +{"loss": 0.56819458, "grad_norm": 0.83711695, "learning_rate": 8.324e-05, "memory(GiB)": 46.14, "train_speed(iter/s)": 0.14542, "rewards/chosen": 13.91250038, "rewards/rejected": 4.15312481, "rewards/accuracies": 1.0, "rewards/margins": 9.75625038, "logps/chosen": -619.20001221, "logps/rejected": -638.40002441, "logits/chosen": -1.72812498, "logits/rejected": -1.72187495, "nll_loss": 0.56484377, "epoch": 0.92105263, "global_step/max_steps": "35/114", "percentage": "30.70%", "elapsed_time": "3m 57s", "remaining_time": "8m 54s"} +{"loss": 0.53173828, "grad_norm": 0.73344678, "learning_rate": 7.748e-05, "memory(GiB)": 46.14, "train_speed(iter/s)": 0.146745, "rewards/chosen": 14.21249962, "rewards/rejected": 2.28554678, "rewards/accuracies": 1.0, "rewards/margins": 11.91250038, "logps/chosen": -471.6000061, "logps/rejected": -683.20001221, "logits/chosen": -1.66250002, "logits/rejected": -1.69843745, "nll_loss": 0.58671874, "epoch": 1.05263158, "global_step/max_steps": "40/114", "percentage": "35.09%", "elapsed_time": "4m 28s", "remaining_time": "8m 17s"} +{"eval_loss": 0.32861328, "eval_runtime": 2.3307, "eval_samples_per_second": 1.716, "eval_steps_per_second": 0.429, "eval_rewards/chosen": 13.1875, "eval_rewards/rejected": -0.8515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.0625, "eval_logps/chosen": -159.0, "eval_logps/rejected": -1120.0, "eval_logits/chosen": -0.984375, "eval_logits/rejected": -1.734375, "eval_nll_loss": 0.328125, "epoch": 1.05263158, "global_step/max_steps": "40/114", "percentage": "35.09%", "elapsed_time": "4m 31s", "remaining_time": "8m 21s"} +{"loss": 0.44443359, "grad_norm": 0.54625067, "learning_rate": 7.113e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.145269, "rewards/chosen": 15.1875, "rewards/rejected": -0.70078123, "rewards/accuracies": 1.0, "rewards/margins": 15.89999962, "logps/chosen": -442.3999939, "logps/rejected": -720.40002441, "logits/chosen": -1.55781245, "logits/rejected": -1.59375, "nll_loss": 0.44414061, "epoch": 1.18421053, "global_step/max_steps": "45/114", "percentage": "39.47%", "elapsed_time": "5m 6s", "remaining_time": "7m 49s"} +{"loss": 0.50015869, "grad_norm": 0.44204175, "learning_rate": 6.434e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.146516, "rewards/chosen": 16.3125, "rewards/rejected": -0.56503904, "rewards/accuracies": 1.0, "rewards/margins": 16.875, "logps/chosen": -491.6000061, "logps/rejected": -678.0, "logits/chosen": -1.578125, "logits/rejected": -1.70468748, "nll_loss": 0.50078124, "epoch": 1.31578947, "global_step/max_steps": "50/114", "percentage": "43.86%", "elapsed_time": "5m 37s", "remaining_time": "7m 12s"} +{"eval_loss": 0.31518555, "eval_runtime": 2.3162, "eval_samples_per_second": 1.727, "eval_steps_per_second": 0.432, "eval_rewards/chosen": 13.875, "eval_rewards/rejected": -1.1484375, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 15.0625, "eval_logps/chosen": -152.0, "eval_logps/rejected": -1128.0, "eval_logits/chosen": -1.0, "eval_logits/rejected": -1.671875, "eval_nll_loss": 0.31445312, "epoch": 1.31578947, "global_step/max_steps": "50/114", "percentage": "43.86%", "elapsed_time": "5m 39s", "remaining_time": "7m 15s"} +{"loss": 0.47702637, "grad_norm": 0.28759976, "learning_rate": 5.725e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.147535, "rewards/chosen": 16.57500076, "rewards/rejected": 0.46777344, "rewards/accuracies": 1.0, "rewards/margins": 16.11249924, "logps/chosen": -471.6000061, "logps/rejected": -622.40002441, "logits/chosen": -1.43281245, "logits/rejected": -1.52968752, "nll_loss": 0.4765625, "epoch": 1.44736842, "global_step/max_steps": "55/114", "percentage": "48.25%", "elapsed_time": "6m 9s", "remaining_time": "6m 35s"} +{"loss": 0.4359375, "grad_norm": 0.21479273, "learning_rate": 5e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.148886, "rewards/chosen": 16.91250038, "rewards/rejected": 0.63847655, "rewards/accuracies": 1.0, "rewards/margins": 16.27499962, "logps/chosen": -517.20001221, "logps/rejected": -496.79998779, "logits/chosen": -1.57968748, "logits/rejected": -1.34218752, "nll_loss": 0.43593749, "epoch": 1.57894737, "global_step/max_steps": "60/114", "percentage": "52.63%", "elapsed_time": "6m 39s", "remaining_time": "5m 59s"} +{"eval_loss": 0.31054688, "eval_runtime": 2.3283, "eval_samples_per_second": 1.718, "eval_steps_per_second": 0.43, "eval_rewards/chosen": 14.0, "eval_rewards/rejected": -0.546875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.5625, "eval_logps/chosen": -151.0, "eval_logps/rejected": -1120.0, "eval_logits/chosen": -0.828125, "eval_logits/rejected": -1.6328125, "eval_nll_loss": 0.31054688, "epoch": 1.57894737, "global_step/max_steps": "60/114", "percentage": "52.63%", "elapsed_time": "6m 41s", "remaining_time": "6m 1s"} +{"loss": 0.40950317, "grad_norm": 0.54956009, "learning_rate": 4.275e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.148787, "rewards/chosen": 16.98749924, "rewards/rejected": 0.22265625, "rewards/accuracies": 1.0, "rewards/margins": 16.76250076, "logps/chosen": -452.3999939, "logps/rejected": -544.79998779, "logits/chosen": -1.5078125, "logits/rejected": -1.49687505, "nll_loss": 0.40976563, "epoch": 1.71052632, "global_step/max_steps": "65/114", "percentage": "57.02%", "elapsed_time": "7m 13s", "remaining_time": "5m 26s"} +{"loss": 0.44655762, "grad_norm": 0.39293006, "learning_rate": 3.566e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.149098, "rewards/chosen": 17.08749962, "rewards/rejected": 0.67412108, "rewards/accuracies": 1.0, "rewards/margins": 16.42499924, "logps/chosen": -402.79998779, "logps/rejected": -641.20001221, "logits/chosen": -1.38437498, "logits/rejected": -1.5164063, "nll_loss": 0.44609374, "epoch": 1.84210526, "global_step/max_steps": "70/114", "percentage": "61.40%", "elapsed_time": "7m 45s", "remaining_time": "4m 52s"} +{"eval_loss": 0.31054688, "eval_runtime": 2.3424, "eval_samples_per_second": 1.708, "eval_steps_per_second": 0.427, "eval_rewards/chosen": 14.0625, "eval_rewards/rejected": 0.69921875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -150.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.703125, "eval_logits/rejected": -1.625, "eval_nll_loss": 0.31054688, "epoch": 1.84210526, "global_step/max_steps": "70/114", "percentage": "61.40%", "elapsed_time": "7m 48s", "remaining_time": "4m 54s"} +{"loss": 0.4861084, "grad_norm": 0.64587649, "learning_rate": 2.887e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.148719, "rewards/chosen": 17.46249962, "rewards/rejected": 0.75546873, "rewards/accuracies": 1.0, "rewards/margins": 16.64999962, "logps/chosen": -510.0, "logps/rejected": -680.40002441, "logits/chosen": -1.3671875, "logits/rejected": -1.3984375, "nll_loss": 0.48632812, "epoch": 1.97368421, "global_step/max_steps": "75/114", "percentage": "65.79%", "elapsed_time": "8m 20s", "remaining_time": "4m 20s"} +{"loss": 0.46317139, "grad_norm": 0.5294379, "learning_rate": 2.252e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.147985, "rewards/chosen": 16.85000038, "rewards/rejected": -0.32236329, "rewards/accuracies": 1.0, "rewards/margins": 17.17499924, "logps/chosen": -432.79998779, "logps/rejected": -620.79998779, "logits/chosen": -1.40468752, "logits/rejected": -1.40468752, "nll_loss": 0.47539061, "epoch": 2.10526316, "global_step/max_steps": "80/114", "percentage": "70.18%", "elapsed_time": "8m 56s", "remaining_time": "3m 48s"} +{"eval_loss": 0.30712891, "eval_runtime": 2.3227, "eval_samples_per_second": 1.722, "eval_steps_per_second": 0.431, "eval_rewards/chosen": 14.125, "eval_rewards/rejected": 1.25, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 12.875, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.703125, "eval_logits/rejected": -1.6015625, "eval_nll_loss": 0.30664062, "epoch": 2.10526316, "global_step/max_steps": "80/114", "percentage": "70.18%", "elapsed_time": "8m 59s", "remaining_time": "3m 49s"} +{"loss": 0.40809021, "grad_norm": 0.50980639, "learning_rate": 1.676e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.14806, "rewards/chosen": 17.32500076, "rewards/rejected": 1.20957029, "rewards/accuracies": 1.0, "rewards/margins": 16.13750076, "logps/chosen": -390.79998779, "logps/rejected": -747.20001221, "logits/chosen": -1.26250005, "logits/rejected": -1.5234375, "nll_loss": 0.40781251, "epoch": 2.23684211, "global_step/max_steps": "85/114", "percentage": "74.56%", "elapsed_time": "9m 30s", "remaining_time": "3m 14s"} +{"loss": 0.42548676, "grad_norm": 0.26295033, "learning_rate": 1.17e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.149448, "rewards/chosen": 18.5, "rewards/rejected": 1.65878904, "rewards/accuracies": 1.0, "rewards/margins": 16.79999924, "logps/chosen": -483.20001221, "logps/rejected": -589.59997559, "logits/chosen": -1.44375002, "logits/rejected": -1.22968745, "nll_loss": 0.42500001, "epoch": 2.36842105, "global_step/max_steps": "90/114", "percentage": "78.95%", "elapsed_time": "9m 58s", "remaining_time": "2m 39s"} +{"eval_loss": 0.30639648, "eval_runtime": 2.3883, "eval_samples_per_second": 1.675, "eval_steps_per_second": 0.419, "eval_rewards/chosen": 14.25, "eval_rewards/rejected": 1.1953125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.0, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.70703125, "eval_logits/rejected": -1.59375, "eval_nll_loss": 0.30664062, "epoch": 2.36842105, "global_step/max_steps": "90/114", "percentage": "78.95%", "elapsed_time": "10m 0s", "remaining_time": "2m 40s"} +{"loss": 0.47020874, "grad_norm": 0.25280002, "learning_rate": 7.44e-06, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.148517, "rewards/chosen": 19.25, "rewards/rejected": 1.02675784, "rewards/accuracies": 1.0, "rewards/margins": 18.23749924, "logps/chosen": -526.79998779, "logps/rejected": -720.0, "logits/chosen": -1.36249995, "logits/rejected": -1.4609375, "nll_loss": 0.46953124, "epoch": 2.5, "global_step/max_steps": "95/114", "percentage": "83.33%", "elapsed_time": "10m 35s", "remaining_time": "2m 7s"} +{"loss": 0.40236511, "grad_norm": 0.42496145, "learning_rate": 4.09e-06, "memory(GiB)": 56.84, "train_speed(iter/s)": 0.14936, "rewards/chosen": 16.76250076, "rewards/rejected": -0.31132811, "rewards/accuracies": 1.0, "rewards/margins": 17.0625, "logps/chosen": -406.20001221, "logps/rejected": -563.20001221, "logits/chosen": -1.39687502, "logits/rejected": -1.5390625, "nll_loss": 0.40253907, "epoch": 2.63157895, "global_step/max_steps": "100/114", "percentage": "87.72%", "elapsed_time": "11m 5s", "remaining_time": "1m 33s"} +{"eval_loss": 0.30639648, "eval_runtime": 2.3404, "eval_samples_per_second": 1.709, "eval_steps_per_second": 0.427, "eval_rewards/chosen": 14.25, "eval_rewards/rejected": 1.0, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.25, "eval_logps/chosen": -148.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.703125, "eval_logits/rejected": -1.59375, "eval_nll_loss": 0.30664062, "epoch": 2.63157895, "global_step/max_steps": "100/114", "percentage": "87.72%", "elapsed_time": "11m 8s", "remaining_time": "1m 33s"} +{"loss": 0.41885986, "grad_norm": 0.56391575, "learning_rate": 1.7e-06, "memory(GiB)": 56.84, "train_speed(iter/s)": 0.149009, "rewards/chosen": 17.11249924, "rewards/rejected": 0.79042971, "rewards/accuracies": 1.0, "rewards/margins": 16.29999924, "logps/chosen": -432.54998779, "logps/rejected": -568.79998779, "logits/chosen": -1.41796875, "logits/rejected": -1.49531245, "nll_loss": 0.41874999, "epoch": 2.76315789, "global_step/max_steps": "105/114", "percentage": "92.11%", "elapsed_time": "11m 40s", "remaining_time": "1m 0s"} +{"loss": 0.39718018, "grad_norm": 0.49265014, "learning_rate": 3.4e-07, "memory(GiB)": 56.84, "train_speed(iter/s)": 0.149381, "rewards/chosen": 17.86249924, "rewards/rejected": 0.46240234, "rewards/accuracies": 1.0, "rewards/margins": 17.375, "logps/chosen": -453.6000061, "logps/rejected": -603.59997559, "logits/chosen": -1.35781252, "logits/rejected": -1.52968752, "nll_loss": 0.39707032, "epoch": 2.89473684, "global_step/max_steps": "110/114", "percentage": "96.49%", "elapsed_time": "12m 12s", "remaining_time": "26s"} +{"eval_loss": 0.30761719, "eval_runtime": 2.3273, "eval_samples_per_second": 1.719, "eval_steps_per_second": 0.43, "eval_rewards/chosen": 14.1875, "eval_rewards/rejected": 1.046875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.125, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.6953125, "eval_logits/rejected": -1.5859375, "eval_nll_loss": 0.30859375, "epoch": 2.89473684, "global_step/max_steps": "110/114", "percentage": "96.49%", "elapsed_time": "12m 15s", "remaining_time": "26s"} +{"eval_loss": 0.30761719, "eval_runtime": 2.2876, "eval_samples_per_second": 1.749, "eval_steps_per_second": 0.437, "eval_rewards/chosen": 14.25, "eval_rewards/rejected": 1.0, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.1875, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.703125, "eval_logits/rejected": -1.5859375, "eval_nll_loss": 0.30859375, "epoch": 3.0, "global_step/max_steps": "114/114", "percentage": "100.00%", "elapsed_time": "12m 47s", "remaining_time": "0s"} +{"train_runtime": 769.1035, "train_samples_per_second": 1.162, "train_steps_per_second": 0.148, "total_flos": 359754023305216.0, "train_loss": 0.64946646, "epoch": 3.0, "global_step/max_steps": "114/114", "percentage": "100.00%", "elapsed_time": "12m 49s", "remaining_time": "0s"} +{"train_dataset": "1698.815436±897.000106, min=182.000000, max=4081.000000, size=298", "val_dataset": "1637.250000±797.581461, min=755.000000, max=2485.000000, size=4", "model_parameter_info": "PeftModelForCausalLM: 14804.4401M Params (34.4064M Trainable [0.2324%]), 0.0001M Buffers.", "last_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-114", "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/checkpoint-90", "best_metric": 0.30639648, "global_step": 114, "log_history": [{"loss": 1.337890625, "grad_norm": 9.854056570832201, "learning_rate": 1.6666666666666667e-05, "memory(GiB)": 10.26, "train_speed(iter/s)": 0.087961, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -704.0, "logps/rejected": -416.0, "logits/chosen": -2.15625, "logits/rejected": -1.40625, "nll_loss": 0.6484375, "epoch": 0.02631578947368421, "step": 1}, {"loss": 2.0830078125, "grad_norm": 13.591956786084207, "learning_rate": 8.333333333333334e-05, "memory(GiB)": 20.01, "train_speed(iter/s)": 0.140484, "rewards/chosen": 0.06103515625, "rewards/rejected": 0.0156402587890625, "rewards/accuracies": 0.3125, "rewards/margins": 0.0452880859375, "logps/chosen": -721.0, "logps/rejected": -575.5, "logits/chosen": -1.595703125, "logits/rejected": -1.576171875, "nll_loss": 1.4130859375, "epoch": 0.13157894736842105, "step": 5}, {"loss": 2.04072265625, "grad_norm": 7.5902824333777845, "learning_rate": 9.966191788709716e-05, "memory(GiB)": 46.14, "train_speed(iter/s)": 0.141561, "rewards/chosen": 1.015625, "rewards/rejected": 0.3578124940395355, "rewards/accuracies": 0.7749999761581421, "rewards/margins": 0.6595703363418579, "logps/chosen": -621.5999755859375, "logps/rejected": -703.2000122070312, "logits/chosen": -1.5265624523162842, "logits/rejected": -1.6375000476837158, "nll_loss": 1.5515625476837158, "epoch": 0.2631578947368421, "step": 10}, {"eval_loss": 0.7734375, "eval_runtime": 2.3433, "eval_samples_per_second": 1.707, "eval_steps_per_second": 0.427, "eval_rewards/chosen": 3.46875, "eval_rewards/rejected": 1.3515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 2.125, "eval_logps/chosen": -256.0, "eval_logps/rejected": -1096.0, "eval_logits/chosen": -0.9375, "eval_logits/rejected": -1.828125, "eval_nll_loss": 0.66015625, "epoch": 0.2631578947368421, "step": 10}, {"loss": 1.05107421875, "grad_norm": 2.131075980613862, "learning_rate": 9.829629131445342e-05, "memory(GiB)": 46.14, "train_speed(iter/s)": 0.146121, "rewards/chosen": 4.015625, "rewards/rejected": 1.545312523841858, "rewards/accuracies": 0.949999988079071, "rewards/margins": 2.4703125953674316, "logps/chosen": -642.7999877929688, "logps/rejected": -580.0, "logits/chosen": -1.720312476158142, "logits/rejected": -1.673437476158142, "nll_loss": 0.87109375, "epoch": 0.39473684210526316, "step": 15}, {"loss": 0.83876953125, "grad_norm": 1.673054658387307, "learning_rate": 9.591080534401371e-05, "memory(GiB)": 46.14, "train_speed(iter/s)": 0.148181, "rewards/chosen": 8.206250190734863, "rewards/rejected": 3.7593750953674316, "rewards/accuracies": 1.0, "rewards/margins": 4.456250190734863, "logps/chosen": -402.3999938964844, "logps/rejected": -565.5999755859375, "logits/chosen": -1.649999976158142, "logits/rejected": -1.7312500476837158, "nll_loss": 0.811718761920929, "epoch": 0.5263157894736842, "step": 20}, {"eval_loss": 0.39208984375, "eval_runtime": 2.3282, "eval_samples_per_second": 1.718, "eval_steps_per_second": 0.43, "eval_rewards/chosen": 11.0625, "eval_rewards/rejected": 6.09375, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 4.9375, "eval_logps/chosen": -180.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -0.875, "eval_logits/rejected": -1.8203125, "eval_nll_loss": 0.37890625, "epoch": 0.5263157894736842, "step": 20}, {"loss": 0.58045654296875, "grad_norm": 1.3628742287518036, "learning_rate": 9.255583362184999e-05, "memory(GiB)": 46.14, "train_speed(iter/s)": 0.146228, "rewards/chosen": 10.899999618530273, "rewards/rejected": 4.528124809265137, "rewards/accuracies": 1.0, "rewards/margins": 6.381249904632568, "logps/chosen": -561.5999755859375, "logps/rejected": -661.2000122070312, "logits/chosen": -1.6453125476837158, "logits/rejected": -1.6375000476837158, "nll_loss": 0.571093738079071, "epoch": 0.6578947368421053, "step": 25}, {"loss": 0.5160888671875, "grad_norm": 0.7276190764282714, "learning_rate": 8.83022221559489e-05, "memory(GiB)": 46.14, "train_speed(iter/s)": 0.146969, "rewards/chosen": 11.899999618530273, "rewards/rejected": 3.450000047683716, "rewards/accuracies": 1.0, "rewards/margins": 8.46875, "logps/chosen": -405.70001220703125, "logps/rejected": -592.7999877929688, "logits/chosen": -1.5343749523162842, "logits/rejected": -1.7734375, "nll_loss": 0.515625, "epoch": 0.7894736842105263, "step": 30}, {"eval_loss": 0.348876953125, "eval_runtime": 2.3448, "eval_samples_per_second": 1.706, "eval_steps_per_second": 0.426, "eval_rewards/chosen": 12.375, "eval_rewards/rejected": 3.84375, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 8.5, "eval_logps/chosen": -167.0, "eval_logps/rejected": -1072.0, "eval_logits/chosen": -1.0078125, "eval_logits/rejected": -1.8125, "eval_nll_loss": 0.34765625, "epoch": 0.7894736842105263, "step": 30}, {"loss": 0.568194580078125, "grad_norm": 0.8371169486087437, "learning_rate": 8.323979328069689e-05, "memory(GiB)": 46.14, "train_speed(iter/s)": 0.14542, "rewards/chosen": 13.912500381469727, "rewards/rejected": 4.153124809265137, "rewards/accuracies": 1.0, "rewards/margins": 9.756250381469727, "logps/chosen": -619.2000122070312, "logps/rejected": -638.4000244140625, "logits/chosen": -1.728124976158142, "logits/rejected": -1.7218749523162842, "nll_loss": 0.5648437738418579, "epoch": 0.9210526315789473, "step": 35}, {"loss": 0.53173828125, "grad_norm": 0.7334467774160591, "learning_rate": 7.74754489035403e-05, "memory(GiB)": 46.14, "train_speed(iter/s)": 0.146745, "rewards/chosen": 14.212499618530273, "rewards/rejected": 2.2855467796325684, "rewards/accuracies": 1.0, "rewards/margins": 11.912500381469727, "logps/chosen": -471.6000061035156, "logps/rejected": -683.2000122070312, "logits/chosen": -1.662500023841858, "logits/rejected": -1.6984374523162842, "nll_loss": 0.586718738079071, "epoch": 1.0526315789473684, "step": 40}, {"eval_loss": 0.32861328125, "eval_runtime": 2.3307, "eval_samples_per_second": 1.716, "eval_steps_per_second": 0.429, "eval_rewards/chosen": 13.1875, "eval_rewards/rejected": -0.8515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.0625, "eval_logps/chosen": -159.0, "eval_logps/rejected": -1120.0, "eval_logits/chosen": -0.984375, "eval_logits/rejected": -1.734375, "eval_nll_loss": 0.328125, "epoch": 1.0526315789473684, "step": 40}, {"loss": 0.44443359375, "grad_norm": 0.546250666668433, "learning_rate": 7.113091308703498e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.145269, "rewards/chosen": 15.1875, "rewards/rejected": -0.7007812261581421, "rewards/accuracies": 1.0, "rewards/margins": 15.899999618530273, "logps/chosen": -442.3999938964844, "logps/rejected": -720.4000244140625, "logits/chosen": -1.5578124523162842, "logits/rejected": -1.59375, "nll_loss": 0.44414061307907104, "epoch": 1.1842105263157894, "step": 45}, {"loss": 0.50015869140625, "grad_norm": 0.44204175223844183, "learning_rate": 6.434016163555452e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.146516, "rewards/chosen": 16.3125, "rewards/rejected": -0.5650390386581421, "rewards/accuracies": 1.0, "rewards/margins": 16.875, "logps/chosen": -491.6000061035156, "logps/rejected": -678.0, "logits/chosen": -1.578125, "logits/rejected": -1.704687476158142, "nll_loss": 0.500781238079071, "epoch": 1.3157894736842106, "step": 50}, {"eval_loss": 0.315185546875, "eval_runtime": 2.3162, "eval_samples_per_second": 1.727, "eval_steps_per_second": 0.432, "eval_rewards/chosen": 13.875, "eval_rewards/rejected": -1.1484375, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 15.0625, "eval_logps/chosen": -152.0, "eval_logps/rejected": -1128.0, "eval_logits/chosen": -1.0, "eval_logits/rejected": -1.671875, "eval_nll_loss": 0.314453125, "epoch": 1.3157894736842106, "step": 50}, {"loss": 0.4770263671875, "grad_norm": 0.2875997592353248, "learning_rate": 5.724659296536233e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.147535, "rewards/chosen": 16.575000762939453, "rewards/rejected": 0.4677734375, "rewards/accuracies": 1.0, "rewards/margins": 16.112499237060547, "logps/chosen": -471.6000061035156, "logps/rejected": -622.4000244140625, "logits/chosen": -1.4328124523162842, "logits/rejected": -1.529687523841858, "nll_loss": 0.4765625, "epoch": 1.4473684210526316, "step": 55}, {"loss": 0.4359375, "grad_norm": 0.21479272542751, "learning_rate": 5e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.148886, "rewards/chosen": 16.912500381469727, "rewards/rejected": 0.638476550579071, "rewards/accuracies": 1.0, "rewards/margins": 16.274999618530273, "logps/chosen": -517.2000122070312, "logps/rejected": -496.79998779296875, "logits/chosen": -1.579687476158142, "logits/rejected": -1.342187523841858, "nll_loss": 0.4359374940395355, "epoch": 1.5789473684210527, "step": 60}, {"eval_loss": 0.310546875, "eval_runtime": 2.3283, "eval_samples_per_second": 1.718, "eval_steps_per_second": 0.43, "eval_rewards/chosen": 14.0, "eval_rewards/rejected": -0.546875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.5625, "eval_logps/chosen": -151.0, "eval_logps/rejected": -1120.0, "eval_logits/chosen": -0.828125, "eval_logits/rejected": -1.6328125, "eval_nll_loss": 0.310546875, "epoch": 1.5789473684210527, "step": 60}, {"loss": 0.409503173828125, "grad_norm": 0.5495600856192168, "learning_rate": 4.275340703463767e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.148787, "rewards/chosen": 16.987499237060547, "rewards/rejected": 0.22265625, "rewards/accuracies": 1.0, "rewards/margins": 16.762500762939453, "logps/chosen": -452.3999938964844, "logps/rejected": -544.7999877929688, "logits/chosen": -1.5078125, "logits/rejected": -1.4968750476837158, "nll_loss": 0.4097656309604645, "epoch": 1.7105263157894737, "step": 65}, {"loss": 0.4465576171875, "grad_norm": 0.3929300642395231, "learning_rate": 3.5659838364445505e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.149098, "rewards/chosen": 17.087499618530273, "rewards/rejected": 0.674121081829071, "rewards/accuracies": 1.0, "rewards/margins": 16.424999237060547, "logps/chosen": -402.79998779296875, "logps/rejected": -641.2000122070312, "logits/chosen": -1.384374976158142, "logits/rejected": -1.5164062976837158, "nll_loss": 0.44609373807907104, "epoch": 1.8421052631578947, "step": 70}, {"eval_loss": 0.310546875, "eval_runtime": 2.3424, "eval_samples_per_second": 1.708, "eval_steps_per_second": 0.427, "eval_rewards/chosen": 14.0625, "eval_rewards/rejected": 0.69921875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -150.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.703125, "eval_logits/rejected": -1.625, "eval_nll_loss": 0.310546875, "epoch": 1.8421052631578947, "step": 70}, {"loss": 0.4861083984375, "grad_norm": 0.6458764930563424, "learning_rate": 2.886908691296504e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.148719, "rewards/chosen": 17.462499618530273, "rewards/rejected": 0.7554687261581421, "rewards/accuracies": 1.0, "rewards/margins": 16.649999618530273, "logps/chosen": -510.0, "logps/rejected": -680.4000244140625, "logits/chosen": -1.3671875, "logits/rejected": -1.3984375, "nll_loss": 0.486328125, "epoch": 1.973684210526316, "step": 75}, {"loss": 0.46317138671875, "grad_norm": 0.5294379022539796, "learning_rate": 2.25245510964597e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.147985, "rewards/chosen": 16.850000381469727, "rewards/rejected": -0.3223632872104645, "rewards/accuracies": 1.0, "rewards/margins": 17.174999237060547, "logps/chosen": -432.79998779296875, "logps/rejected": -620.7999877929688, "logits/chosen": -1.404687523841858, "logits/rejected": -1.404687523841858, "nll_loss": 0.47539061307907104, "epoch": 2.1052631578947367, "step": 80}, {"eval_loss": 0.30712890625, "eval_runtime": 2.3227, "eval_samples_per_second": 1.722, "eval_steps_per_second": 0.431, "eval_rewards/chosen": 14.125, "eval_rewards/rejected": 1.25, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 12.875, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.703125, "eval_logits/rejected": -1.6015625, "eval_nll_loss": 0.306640625, "epoch": 2.1052631578947367, "step": 80}, {"loss": 0.4080902099609375, "grad_norm": 0.5098063945805259, "learning_rate": 1.6760206719303105e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.14806, "rewards/chosen": 17.325000762939453, "rewards/rejected": 1.209570288658142, "rewards/accuracies": 1.0, "rewards/margins": 16.137500762939453, "logps/chosen": -390.79998779296875, "logps/rejected": -747.2000122070312, "logits/chosen": -1.2625000476837158, "logits/rejected": -1.5234375, "nll_loss": 0.4078125059604645, "epoch": 2.236842105263158, "step": 85}, {"loss": 0.42548675537109376, "grad_norm": 0.2629503293334859, "learning_rate": 1.1697777844051105e-05, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.149448, "rewards/chosen": 18.5, "rewards/rejected": 1.658789038658142, "rewards/accuracies": 1.0, "rewards/margins": 16.799999237060547, "logps/chosen": -483.20001220703125, "logps/rejected": -589.5999755859375, "logits/chosen": -1.443750023841858, "logits/rejected": -1.2296874523162842, "nll_loss": 0.42500001192092896, "epoch": 2.3684210526315788, "step": 90}, {"eval_loss": 0.306396484375, "eval_runtime": 2.3883, "eval_samples_per_second": 1.675, "eval_steps_per_second": 0.419, "eval_rewards/chosen": 14.25, "eval_rewards/rejected": 1.1953125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.0, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.70703125, "eval_logits/rejected": -1.59375, "eval_nll_loss": 0.306640625, "epoch": 2.3684210526315788, "step": 90}, {"loss": 0.470208740234375, "grad_norm": 0.2528000224499017, "learning_rate": 7.444166378150013e-06, "memory(GiB)": 47.59, "train_speed(iter/s)": 0.148517, "rewards/chosen": 19.25, "rewards/rejected": 1.026757836341858, "rewards/accuracies": 1.0, "rewards/margins": 18.237499237060547, "logps/chosen": -526.7999877929688, "logps/rejected": -720.0, "logits/chosen": -1.3624999523162842, "logits/rejected": -1.4609375, "nll_loss": 0.46953123807907104, "epoch": 2.5, "step": 95}, {"loss": 0.4023651123046875, "grad_norm": 0.4249614464684851, "learning_rate": 4.089194655986306e-06, "memory(GiB)": 56.84, "train_speed(iter/s)": 0.14936, "rewards/chosen": 16.762500762939453, "rewards/rejected": -0.31132811307907104, "rewards/accuracies": 1.0, "rewards/margins": 17.0625, "logps/chosen": -406.20001220703125, "logps/rejected": -563.2000122070312, "logits/chosen": -1.396875023841858, "logits/rejected": -1.5390625, "nll_loss": 0.40253907442092896, "epoch": 2.6315789473684212, "step": 100}, {"eval_loss": 0.306396484375, "eval_runtime": 2.3404, "eval_samples_per_second": 1.709, "eval_steps_per_second": 0.427, "eval_rewards/chosen": 14.25, "eval_rewards/rejected": 1.0, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.25, "eval_logps/chosen": -148.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.703125, "eval_logits/rejected": -1.59375, "eval_nll_loss": 0.306640625, "epoch": 2.6315789473684212, "step": 100}, {"loss": 0.41885986328125, "grad_norm": 0.5639157494773256, "learning_rate": 1.70370868554659e-06, "memory(GiB)": 56.84, "train_speed(iter/s)": 0.149009, "rewards/chosen": 17.112499237060547, "rewards/rejected": 0.7904297113418579, "rewards/accuracies": 1.0, "rewards/margins": 16.299999237060547, "logps/chosen": -432.54998779296875, "logps/rejected": -568.7999877929688, "logits/chosen": -1.41796875, "logits/rejected": -1.4953124523162842, "nll_loss": 0.41874998807907104, "epoch": 2.763157894736842, "step": 105}, {"loss": 0.39718017578125, "grad_norm": 0.4926501392943744, "learning_rate": 3.380821129028489e-07, "memory(GiB)": 56.84, "train_speed(iter/s)": 0.149381, "rewards/chosen": 17.862499237060547, "rewards/rejected": 0.46240234375, "rewards/accuracies": 1.0, "rewards/margins": 17.375, "logps/chosen": -453.6000061035156, "logps/rejected": -603.5999755859375, "logits/chosen": -1.357812523841858, "logits/rejected": -1.529687523841858, "nll_loss": 0.3970703184604645, "epoch": 2.8947368421052633, "step": 110}, {"eval_loss": 0.3076171875, "eval_runtime": 2.3273, "eval_samples_per_second": 1.719, "eval_steps_per_second": 0.43, "eval_rewards/chosen": 14.1875, "eval_rewards/rejected": 1.046875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.125, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.6953125, "eval_logits/rejected": -1.5859375, "eval_nll_loss": 0.30859375, "epoch": 2.8947368421052633, "step": 110}, {"eval_loss": 0.3076171875, "eval_runtime": 2.2876, "eval_samples_per_second": 1.749, "eval_steps_per_second": 0.437, "eval_rewards/chosen": 14.25, "eval_rewards/rejected": 1.0, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.1875, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.703125, "eval_logits/rejected": -1.5859375, "eval_nll_loss": 0.30859375, "epoch": 3.0, "step": 114}, {"train_runtime": 769.1035, "train_samples_per_second": 1.162, "train_steps_per_second": 0.148, "total_flos": 359754023305216.0, "train_loss": 0.6494664643940172, "epoch": 3.0, "step": 114}], "memory": 56.84375} diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs/events.out.tfevents.1739623116.kml-task-540432-record-10144729-prod-worker-0.9818.0 b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs/events.out.tfevents.1739623116.kml-task-540432-record-10144729-prod-worker-0.9818.0 new file mode 100644 index 0000000000000000000000000000000000000000..d897ae62f80570b233f093b53a92f9ee3cb74a44 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-123709/runs/events.out.tfevents.1739623116.kml-task-540432-record-10144729-prod-worker-0.9818.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c7c17bb687d816b52250f34d9acc81c3ba1a2f37cabb1a1233eb3c1fd7a9485 +size 36881 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/args.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/args.json new file mode 100644 index 0000000000000000000000000000000000000000..1115c6d9c5a07f0a258efadd0ed2b11cdd05eeda --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-14b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/README.md b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c5c0404813bb3d884c2d0750e24391042738c029 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-14b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a7c56e118080ac8fcfab07c714ee2f0d7ff595a8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "up_proj", + "gate_proj", + "q_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e070b18f6b5126b05eedbb51bd53f6e26eec0b9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c448e766ee8026c7ecf0dad3e496837eb083b591a6c1be72d5d2db3309b77002 +size 68902296 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/args.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/args.json new file mode 100644 index 0000000000000000000000000000000000000000..1115c6d9c5a07f0a258efadd0ed2b11cdd05eeda --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-14b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..885ea98a53eb00a89527d76dd4b8838655b23a5e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0bc5ac6922afd09eb1ef9513335b4e533c261670a5d6af3824b0ec604f8e329 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..101733a1a265dfa876b3e7e3d79d5b7dce26e63a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17443c275acea17453e8fcda6a48ebfa616452eb373df1b7803291e5f1384237 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..abb92385580951286647aef29760d68baa82bea6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddfb61334919baa7a025802e8fb9ee9692d3f52030e26d621bdc8078fde10443 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fddf18c9b5314e9cc6cdf1f0815bb39e5cec02e5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e50b02f8e2354d162ac9540f8449dba04fca6c1a1b1799e9cb92959b05d3e95 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9769b026c63dea6f90c57f90521ebb99c568560b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12115d4d9fdca408d0203fec3d74ae619e77518caa688e907128ee4a72124c7b +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab87af8cfc3173806a9b5a18ec17964cefdf7fa9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1e742d5c13c2a769e902a958b1886429f23f233082626a922f73162a5887ba1 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8b80fca36b2073b80c7db565e763c5bff163183 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6bf79dba4d3d3f1e5484e996123e9032c134aaed2ac31e8ecd28591b9de00bf +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9beeb57da57e037942e519a7cc6e34cb1646700 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06626d5d374d1356fd54df5c7dc5c738bf5e2d93f8b65c93137fe4a677d05318 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca9e149d406f3ba060df00662843e208114186d6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:135a7241ef86deb44c06cb46106b144c6e0e0c6012a18d83a3b9900ee9887115 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..70a1db716db77b20ce69e020211bd624d917fdc9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:192dbf9a4ce2f4bf6aa21f7b28309443ecc22114d4c26ab7c31833ee4fd8ed6f +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..633a89f6bcf368edb6854d00497f4df874d8754c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cf6cb84dcdbdfc470e75fcec2424f47825bfc445fc2f5c189a110fd89794a1 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f3a20ba8148315a06d9b6627ed5ee3d3db5255c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:241cf79cc46b8720c3383c4d5e07258f71513067605f3af4e98ada709037fe6c +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb97335edaaf12e7d04b57d168811b35742d6e80 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f68ea5493995f4bd480618ef95080063c299cb1bf9889e54e67f368e50295cf0 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..382f020457c5f2831932377914bded675ff5039e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97553c6ea6760e161bf7482142842224a0033997c14b61e006ff2f7ae10c024a +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..161855440189181ba47e10abb51733b1fc4e3093 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8946d6a0f45a11de886c6a8d31a7b51eabf81902df7507f1c93812c6b4b6f01 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..26de83c3f3bce77c08aa0fdc646a85355a613bb0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e371c9fd2aa88159e817c8c5175b22abb35a52efc1c96e22a5a080c415017f82 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/latest b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/latest new file mode 100644 index 0000000000000000000000000000000000000000..744ae7dbad571b6f37ec6c7066549494261bb59e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/latest @@ -0,0 +1 @@ +global_step100 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..97f51b498d48145bd9cc14b35f8236b9ec95a4f7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1bec598899f9d59e70c1b4705ce420a1e0a670957b6c8153a589880068ae5a4 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..08e59ac81067b262a084604cd3392250166c2841 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60d2348aae518f4c44693db9c9b4b3a3299c556e7f0a86c188b2e4c3e364a7c +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..20a24c17b4be2ee59cd5e6682010519318a91e58 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe5a79d3bcb4ce033de360bc765e616316e3562aba25887cd85c4adbb935abf +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..54050f6cf8fb847e2a926e14a7aad2647761521a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a9d1f6e22677721841890e6a27855857e6840137650d609eb8e4ac13b71d29 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..263aae475c49b090bce43f143308192c5bf9a95b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac4ff84388a6a4fe3bcae6207c68b2ee5528fb3b6de8cc3588fe1975462aa5 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..942ed5d60ae87dce686b33da76a34db404036dc6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fce3cdf5c1b8a8a291e0c73b384e3ad5252640e21e942b44b26b8b0928ffa9 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..57789be3df3983cb8acc1500bf6470ffadb1c578 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:919e675f3bcaf4f3c8ba35cd8debf85aec3bbc3c8e5019b74431e0a314e4d37a +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..b32d6e2e7eb7148713b473b0c821a98e616ab6e6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf6479ce82b88efc6a72a8ee512162b3d0ecab972817296d38ab9c448bb8d96 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2a1fb08c48e9d34df783eb19e7c9d1caf0ed386 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ec37c3a15b8d061312402391f2fddb52d623a1416d6d2879a30f184450d844f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e57855710081475828f7aad5072578252dc469e2 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/trainer_state.json @@ -0,0 +1,581 @@ +{ + "best_metric": 0.30639648, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90", + "epoch": 2.6315789473684212, + "eval_steps": 10, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 9.854079581094561, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -2.15625, + "logits/rejected": -1.40625, + "logps/chosen": -704.0, + "logps/rejected": -416.0, + "loss": 1.337890625, + "memory(GiB)": 9.88, + "nll_loss": 0.6484375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.089235 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.792355942314634, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.595703125, + "logits/rejected": -1.576171875, + "logps/chosen": -721.0, + "logps/rejected": -575.5, + "loss": 2.100830078125, + "memory(GiB)": 19.62, + "nll_loss": 1.4130859375, + "rewards/accuracies": 0.1875, + "rewards/chosen": 0.03759765625, + "rewards/margins": 0.01247406005859375, + "rewards/rejected": 0.0250244140625, + "step": 5, + "train_speed(iter/s)": 0.141028 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 6.728344330769066, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.524999976158142, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -620.7999877929688, + "logps/rejected": -703.2000122070312, + "loss": 2.03017578125, + "memory(GiB)": 46.15, + "nll_loss": 1.5515625476837158, + "rewards/accuracies": 0.75, + "rewards/chosen": 1.0109374523162842, + "rewards/margins": 0.6689453125, + "rewards/rejected": 0.34417724609375, + "step": 10, + "train_speed(iter/s)": 0.141858 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -0.9375, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -256.0, + "eval_logps/rejected": -1096.0, + "eval_loss": 0.7890625, + "eval_nll_loss": 0.65625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 3.4375, + "eval_rewards/margins": 2.09375, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 2.2882, + "eval_samples_per_second": 1.748, + "eval_steps_per_second": 0.437, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.3440116641074753, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.717187523841858, + "logits/rejected": -1.670312523841858, + "logps/chosen": -641.2000122070312, + "logps/rejected": -579.5999755859375, + "loss": 1.047705078125, + "memory(GiB)": 46.15, + "nll_loss": 0.864062488079071, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.159375190734863, + "rewards/margins": 2.5609374046325684, + "rewards/rejected": 1.6046874523162842, + "step": 15, + "train_speed(iter/s)": 0.146389 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 1.6177693098167876, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.6515624523162842, + "logits/rejected": -1.734375, + "logps/chosen": -400.3999938964844, + "logps/rejected": -564.4000244140625, + "loss": 0.8362548828125, + "memory(GiB)": 46.15, + "nll_loss": 0.8031250238418579, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.362500190734863, + "rewards/margins": 4.474999904632568, + "rewards/rejected": 3.8843750953674316, + "step": 20, + "train_speed(iter/s)": 0.148317 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -0.8828125, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -179.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.388671875, + "eval_nll_loss": 0.376953125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 5.125, + "eval_rewards/rejected": 6.0, + "eval_runtime": 2.32, + "eval_samples_per_second": 1.724, + "eval_steps_per_second": 0.431, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.2977003336577986, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6531250476837158, + "logits/rejected": -1.646875023841858, + "logps/chosen": -560.0, + "logps/rejected": -661.5999755859375, + "loss": 0.57784423828125, + "memory(GiB)": 46.15, + "nll_loss": 0.571093738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.087499618530273, + "rewards/margins": 6.550000190734863, + "rewards/rejected": 4.537499904632568, + "step": 25, + "train_speed(iter/s)": 0.146297 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.7551717618560747, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.545312523841858, + "logits/rejected": -1.78125, + "logps/chosen": -405.20001220703125, + "logps/rejected": -594.0, + "loss": 0.5139404296875, + "memory(GiB)": 46.15, + "nll_loss": 0.513671875, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.925000190734863, + "rewards/margins": 8.675000190734863, + "rewards/rejected": 3.2593750953674316, + "step": 30, + "train_speed(iter/s)": 0.147047 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.015625, + "eval_logits/rejected": -1.8203125, + "eval_logps/chosen": -168.0, + "eval_logps/rejected": -1088.0, + "eval_loss": 0.347900390625, + "eval_nll_loss": 0.34765625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 12.3125, + "eval_rewards/margins": 9.5625, + "eval_rewards/rejected": 2.75, + "eval_runtime": 2.3228, + "eval_samples_per_second": 1.722, + "eval_steps_per_second": 0.431, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.8190181064971851, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.7468750476837158, + "logits/rejected": -1.735937476158142, + "logps/chosen": -620.4000244140625, + "logps/rejected": -643.5999755859375, + "loss": 0.56610107421875, + "memory(GiB)": 46.15, + "nll_loss": 0.565625011920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.862500190734863, + "rewards/margins": 10.087499618530273, + "rewards/rejected": 3.7906250953674316, + "step": 35, + "train_speed(iter/s)": 0.145437 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7484729451276256, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.6843750476837158, + "logits/rejected": -1.7156250476837158, + "logps/chosen": -469.6000061035156, + "logps/rejected": -676.0, + "loss": 0.530419921875, + "memory(GiB)": 46.15, + "nll_loss": 0.582812488079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.425000190734863, + "rewards/margins": 11.5625, + "rewards/rejected": 2.8515625, + "step": 40, + "train_speed(iter/s)": 0.146795 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.0078125, + "eval_logits/rejected": -1.75, + "eval_logps/chosen": -157.0, + "eval_logps/rejected": -1112.0, + "eval_loss": 0.325927734375, + "eval_nll_loss": 0.326171875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.375, + "eval_rewards/margins": 13.0625, + "eval_rewards/rejected": 0.30078125, + "eval_runtime": 2.3449, + "eval_samples_per_second": 1.706, + "eval_steps_per_second": 0.426, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.513917830071538, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.610937476158142, + "logps/chosen": -441.6000061035156, + "logps/rejected": -712.4000244140625, + "loss": 0.444189453125, + "memory(GiB)": 47.61, + "nll_loss": 0.4437499940395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.300000190734863, + "rewards/margins": 15.300000190734863, + "rewards/rejected": 0.00937500037252903, + "step": 45, + "train_speed(iter/s)": 0.145239 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.4704081803690242, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.5890624523162842, + "logits/rejected": -1.7140624523162842, + "logps/chosen": -492.3999938964844, + "logps/rejected": -671.2000122070312, + "loss": 0.500921630859375, + "memory(GiB)": 47.61, + "nll_loss": 0.501171886920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.225000381469727, + "rewards/margins": 16.200000762939453, + "rewards/rejected": 0.04252929612994194, + "step": 50, + "train_speed(iter/s)": 0.146472 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.0, + "eval_logits/rejected": -1.65625, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.313720703125, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.9375, + "eval_rewards/margins": 14.625, + "eval_rewards/rejected": -0.69921875, + "eval_runtime": 2.283, + "eval_samples_per_second": 1.752, + "eval_steps_per_second": 0.438, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.31799240114671307, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.443750023841858, + "logits/rejected": -1.5265624523162842, + "logps/chosen": -471.20001220703125, + "logps/rejected": -620.7999877929688, + "loss": 0.4777099609375, + "memory(GiB)": 47.61, + "nll_loss": 0.478515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.662500381469727, + "rewards/margins": 15.987500190734863, + "rewards/rejected": 0.690625011920929, + "step": 55, + "train_speed(iter/s)": 0.147538 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.20661664017833653, + "learning_rate": 5e-05, + "logits/chosen": -1.587499976158142, + "logits/rejected": -1.357812523841858, + "logps/chosen": -514.7999877929688, + "logps/rejected": -496.0, + "loss": 0.43349609375, + "memory(GiB)": 47.61, + "nll_loss": 0.43359375, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.049999237060547, + "rewards/margins": 16.424999237060547, + "rewards/rejected": 0.6507812738418579, + "step": 60, + "train_speed(iter/s)": 0.148871 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -0.78125, + "eval_logits/rejected": -1.6484375, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.31396484375, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.875, + "eval_rewards/margins": 14.875, + "eval_rewards/rejected": -1.0, + "eval_runtime": 2.3335, + "eval_samples_per_second": 1.714, + "eval_steps_per_second": 0.429, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.5101990627724774, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.490625023841858, + "logits/rejected": -1.5187499523162842, + "logps/chosen": -452.3999938964844, + "logps/rejected": -550.7999877929688, + "loss": 0.4079315185546875, + "memory(GiB)": 47.61, + "nll_loss": 0.408203125, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.987499237060547, + "rewards/margins": 17.399999618530273, + "rewards/rejected": -0.40937501192092896, + "step": 65, + "train_speed(iter/s)": 0.148763 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.40382497884369994, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.375, + "logits/rejected": -1.532812476158142, + "logps/chosen": -402.20001220703125, + "logps/rejected": -646.4000244140625, + "loss": 0.44562835693359376, + "memory(GiB)": 47.61, + "nll_loss": 0.4457031190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.137500762939453, + "rewards/margins": 16.899999618530273, + "rewards/rejected": 0.24648436903953552, + "step": 70, + "train_speed(iter/s)": 0.14889 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -0.71875, + "eval_logits/rejected": -1.6484375, + "eval_logps/chosen": -151.0, + "eval_logps/rejected": -1112.0, + "eval_loss": 0.312744140625, + "eval_nll_loss": 0.3125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.9375, + "eval_rewards/margins": 13.875, + "eval_rewards/rejected": 0.099609375, + "eval_runtime": 2.2858, + "eval_samples_per_second": 1.75, + "eval_steps_per_second": 0.437, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.6457723772811964, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.376562476158142, + "logits/rejected": -1.412500023841858, + "logps/chosen": -511.20001220703125, + "logps/rejected": -682.0, + "loss": 0.4864990234375, + "memory(GiB)": 47.61, + "nll_loss": 0.48710936307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.375, + "rewards/margins": 16.825000762939453, + "rewards/rejected": 0.583203136920929, + "step": 75, + "train_speed(iter/s)": 0.148551 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5530412658330373, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.4093749523162842, + "logits/rejected": -1.4171874523162842, + "logps/chosen": -432.3999938964844, + "logps/rejected": -624.0, + "loss": 0.463671875, + "memory(GiB)": 47.61, + "nll_loss": 0.47734373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.875, + "rewards/margins": 17.512500762939453, + "rewards/rejected": -0.6333984136581421, + "step": 80, + "train_speed(iter/s)": 0.147818 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -0.69921875, + "eval_logits/rejected": -1.609375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.307373046875, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.125, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.3551, + "eval_samples_per_second": 1.698, + "eval_steps_per_second": 0.425, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.4844426897846391, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.256250023841858, + "logits/rejected": -1.537500023841858, + "logps/chosen": -392.3999938964844, + "logps/rejected": -744.7999877929688, + "loss": 0.40716094970703126, + "memory(GiB)": 47.61, + "nll_loss": 0.40703123807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.162500381469727, + "rewards/margins": 15.800000190734863, + "rewards/rejected": 1.385156273841858, + "step": 85, + "train_speed(iter/s)": 0.147889 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.31612081336180875, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.4296875, + "logits/rejected": -1.2374999523162842, + "logps/chosen": -483.20001220703125, + "logps/rejected": -590.0, + "loss": 0.4235595703125, + "memory(GiB)": 47.61, + "nll_loss": 0.423828125, + "rewards/accuracies": 1.0, + "rewards/chosen": 18.549999237060547, + "rewards/margins": 16.887500762939453, + "rewards/rejected": 1.6085937023162842, + "step": 90, + "train_speed(iter/s)": 0.149275 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -0.6953125, + "eval_logits/rejected": -1.6015625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.306396484375, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.1875, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.3, + "eval_samples_per_second": 1.739, + "eval_steps_per_second": 0.435, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.24964156861715608, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -1.360937476158142, + "logits/rejected": -1.467187523841858, + "logps/chosen": -524.4000244140625, + "logps/rejected": -720.0, + "loss": 0.46888427734375, + "memory(GiB)": 47.61, + "nll_loss": 0.46875, + "rewards/accuracies": 1.0, + "rewards/chosen": 19.375, + "rewards/margins": 18.424999237060547, + "rewards/rejected": 0.9701172113418579, + "step": 95, + "train_speed(iter/s)": 0.148344 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.42005408649300036, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -1.392187476158142, + "logits/rejected": -1.546875, + "logps/chosen": -405.3999938964844, + "logps/rejected": -564.4000244140625, + "loss": 0.40069580078125, + "memory(GiB)": 56.86, + "nll_loss": 0.4007812440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.799999237060547, + "rewards/margins": 17.225000381469727, + "rewards/rejected": -0.4281249940395355, + "step": 100, + "train_speed(iter/s)": 0.149184 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -0.69140625, + "eval_logits/rejected": -1.6015625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.30712890625, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.125, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.2948, + "eval_samples_per_second": 1.743, + "eval_steps_per_second": 0.436, + "step": 100 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 316219562655744.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..910de15042b38b21d1ffd30fb26ec6176527a29d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cca6d16ca5c664d1b07e14590d3d0bcd5bea63a33cc8b21067ece209aa1a26b0 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-100/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/README.md b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c5c0404813bb3d884c2d0750e24391042738c029 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-14b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a7c56e118080ac8fcfab07c714ee2f0d7ff595a8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "up_proj", + "gate_proj", + "q_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c84b5f0223c55fdd18bcefe719b8386119e6e16 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a3ba7e96b8d1e5b7862c82d57fc830719a2c7c2390de509d3f146930ccd1e4 +size 68902296 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/args.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/args.json new file mode 100644 index 0000000000000000000000000000000000000000..1115c6d9c5a07f0a258efadd0ed2b11cdd05eeda --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-14b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..048f7db4d11d7a0fc7e96a729b97e6a70aad7585 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a71fefba31a9ab4b0b2117b7569a6959e1fcd3aefc837f6e39fb87948e35d65b +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e7ac6dad603d31188943f2abb205fd8e8576df5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60f42244932bca9ecbda604927a2d5903481b88289f6b6ebe0a94233da310b57 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3237ef99aaae91de661d4c0074fc25c6114f827d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15224631bc59820477b1dfca2d192a61b9a04a256840d7fafb0bfd5867f26a8f +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1587b9b1c91ab59f29647287a013d42e3ac08c16 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843907b29bb7399ea5657b65787cc46c56ac58744f24f09f0a5244374ed44bae +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..459de67a7d31f91c4fe1bac484b8a851a8f1505b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adefb60a04c42f437d877f0ef4a3111ff9586f6826bdf265f14c0ffbad9b539e +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d97b8029dce392458776090f41831b6d64ca0460 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:437a9677870b22a2f9f7cd83724348987ee06a19642b8fb92e22b9cdacb7922a +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a7e5778483f9ae996422513c44635c7f2141e2b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40cbea3cdafd7ed4aa1759a8704901c085e1a320febe941e09f986c8c490889e +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a6bd1e1db5fb4a13234e52ea0eecb6cfa1c5cebd --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aeea012c4165a630dd9ca218084e5c0f64dc58ae3535c5d4e35d3a0cfe6914d +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9b6dfaee1368c16d95dda01000019032e8f73eb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cfb7fb8bbdac3c53af8aacd47d8558291e75bfd01d7b4edbe195c9b7cf3264e +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..22eb91a1a8757debe9f8787fbde77224d9b17e96 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:244656b5370a022133d56ecf244f500ddb96315b5edeacf2c2be253b20caad1e +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..89a731a2b11066ecf64bf9c72ac900cb8fe83c71 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eff9e36d9838e901f5dd8cff2f4a6cec5aac98985915b36a5da4d6ed344e3fe6 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f51dfbcf6981eb37c0a945576f20bfb0f4d48e68 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a216e82bfd7548b89fa07cd74b4a0f7844f56996bb401be1c7104f1acce66d5 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..da6fdcf31e89d7dae19ca2b9e5a22bd5a2b1797f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43196b68f0349fc7030997a40ded0d6ff8fa2b0eb3217108654fc3b97836a3cf +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fc78d32361c82899f602e80a67778d52299f38b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f727b2aa45fccd37f5e22c05d8c76daf0653f24c8d7f2c82f9b77344846d202 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..43bd2ed2136f04bbd33135ddbcba8eb278e25d8d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0704c3c886966fa12eb8c4e178097af9d0278e93024f996e1342c72be1979b7f +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5ad37553a29d9b1ee0b918421b9b2d6d3ece89b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2714c41054a80bc60e001a072d6081cb1dd55a0b44099c185f6e941d8701980 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/latest b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/latest new file mode 100644 index 0000000000000000000000000000000000000000..a9a22a69382a7711ca9e8ab6945c6d2cc8984927 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/latest @@ -0,0 +1 @@ +global_step110 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..584f4a4a43f100f35696d7314a633631af587f25 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7891ffa7c7dae99113aa986d67278b52b8c57db55001dc3547a61f24569a34ee +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..05b027a867e5e9cebd446293ecff82cfb240cc76 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b92875cb04deec367605433847d1bda444b178b643d2da7ed9aaf738d232b4 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..af98f0dfe2a5d89fbccf90df58246a0b078c7016 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f5f3338a05e325b5408a1cd0b6f5e5b10fad05fe479d63f44bec4cf18107d6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..715aa4a4ee3915f810fc2bacb2153eb8a0913781 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1be749fea477a3867d44010631937e0d8f071ca5f9614f9795c92c7fa68833a6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..c7bde70899833455b6ee4a99aff9388abc5ffe92 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbc4a5ea4532c621f4c8e9891117b2e597a7f005001e8b4f2a1b4da8c82bf964 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..90cdeaa2fe438098e9d95ddbc06c765e51af1e78 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480f9fe7dd71b54d915b46162e34b780ba2467d5542115cc809dbca60b394c0e +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..2bd30529614c5be239cd9477af6bef0e313740b6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11d982dcd813e82c2d97a5491ce9624cff2dd22e8655ea617ccef1fc1474470 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..bed311094effd49cc2c89237c675f56eade157d1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73494fac3a001cba7cedd097b97f028d4c1d136ee6709214b0a7fe305e5b9089 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..38b1a376e7c81e3c533cf8a69ddf4eefa9d1336c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0767a9fe84680a5a8a76633a443cb301092115c026c1f5f7f1fbdc53dd7f856f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c8bd621261183bee7ac8af0c883257c035ab6b87 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/trainer_state.json @@ -0,0 +1,634 @@ +{ + "best_metric": 0.30639648, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90", + "epoch": 2.8947368421052633, + "eval_steps": 10, + "global_step": 110, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 9.854079581094561, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -2.15625, + "logits/rejected": -1.40625, + "logps/chosen": -704.0, + "logps/rejected": -416.0, + "loss": 1.337890625, + "memory(GiB)": 9.88, + "nll_loss": 0.6484375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.089235 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.792355942314634, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.595703125, + "logits/rejected": -1.576171875, + "logps/chosen": -721.0, + "logps/rejected": -575.5, + "loss": 2.100830078125, + "memory(GiB)": 19.62, + "nll_loss": 1.4130859375, + "rewards/accuracies": 0.1875, + "rewards/chosen": 0.03759765625, + "rewards/margins": 0.01247406005859375, + "rewards/rejected": 0.0250244140625, + "step": 5, + "train_speed(iter/s)": 0.141028 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 6.728344330769066, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.524999976158142, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -620.7999877929688, + "logps/rejected": -703.2000122070312, + "loss": 2.03017578125, + "memory(GiB)": 46.15, + "nll_loss": 1.5515625476837158, + "rewards/accuracies": 0.75, + "rewards/chosen": 1.0109374523162842, + "rewards/margins": 0.6689453125, + "rewards/rejected": 0.34417724609375, + "step": 10, + "train_speed(iter/s)": 0.141858 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -0.9375, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -256.0, + "eval_logps/rejected": -1096.0, + "eval_loss": 0.7890625, + "eval_nll_loss": 0.65625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 3.4375, + "eval_rewards/margins": 2.09375, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 2.2882, + "eval_samples_per_second": 1.748, + "eval_steps_per_second": 0.437, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.3440116641074753, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.717187523841858, + "logits/rejected": -1.670312523841858, + "logps/chosen": -641.2000122070312, + "logps/rejected": -579.5999755859375, + "loss": 1.047705078125, + "memory(GiB)": 46.15, + "nll_loss": 0.864062488079071, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.159375190734863, + "rewards/margins": 2.5609374046325684, + "rewards/rejected": 1.6046874523162842, + "step": 15, + "train_speed(iter/s)": 0.146389 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 1.6177693098167876, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.6515624523162842, + "logits/rejected": -1.734375, + "logps/chosen": -400.3999938964844, + "logps/rejected": -564.4000244140625, + "loss": 0.8362548828125, + "memory(GiB)": 46.15, + "nll_loss": 0.8031250238418579, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.362500190734863, + "rewards/margins": 4.474999904632568, + "rewards/rejected": 3.8843750953674316, + "step": 20, + "train_speed(iter/s)": 0.148317 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -0.8828125, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -179.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.388671875, + "eval_nll_loss": 0.376953125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 5.125, + "eval_rewards/rejected": 6.0, + "eval_runtime": 2.32, + "eval_samples_per_second": 1.724, + "eval_steps_per_second": 0.431, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.2977003336577986, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6531250476837158, + "logits/rejected": -1.646875023841858, + "logps/chosen": -560.0, + "logps/rejected": -661.5999755859375, + "loss": 0.57784423828125, + "memory(GiB)": 46.15, + "nll_loss": 0.571093738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.087499618530273, + "rewards/margins": 6.550000190734863, + "rewards/rejected": 4.537499904632568, + "step": 25, + "train_speed(iter/s)": 0.146297 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.7551717618560747, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.545312523841858, + "logits/rejected": -1.78125, + "logps/chosen": -405.20001220703125, + "logps/rejected": -594.0, + "loss": 0.5139404296875, + "memory(GiB)": 46.15, + "nll_loss": 0.513671875, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.925000190734863, + "rewards/margins": 8.675000190734863, + "rewards/rejected": 3.2593750953674316, + "step": 30, + "train_speed(iter/s)": 0.147047 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.015625, + "eval_logits/rejected": -1.8203125, + "eval_logps/chosen": -168.0, + "eval_logps/rejected": -1088.0, + "eval_loss": 0.347900390625, + "eval_nll_loss": 0.34765625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 12.3125, + "eval_rewards/margins": 9.5625, + "eval_rewards/rejected": 2.75, + "eval_runtime": 2.3228, + "eval_samples_per_second": 1.722, + "eval_steps_per_second": 0.431, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.8190181064971851, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.7468750476837158, + "logits/rejected": -1.735937476158142, + "logps/chosen": -620.4000244140625, + "logps/rejected": -643.5999755859375, + "loss": 0.56610107421875, + "memory(GiB)": 46.15, + "nll_loss": 0.565625011920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.862500190734863, + "rewards/margins": 10.087499618530273, + "rewards/rejected": 3.7906250953674316, + "step": 35, + "train_speed(iter/s)": 0.145437 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7484729451276256, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.6843750476837158, + "logits/rejected": -1.7156250476837158, + "logps/chosen": -469.6000061035156, + "logps/rejected": -676.0, + "loss": 0.530419921875, + "memory(GiB)": 46.15, + "nll_loss": 0.582812488079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.425000190734863, + "rewards/margins": 11.5625, + "rewards/rejected": 2.8515625, + "step": 40, + "train_speed(iter/s)": 0.146795 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.0078125, + "eval_logits/rejected": -1.75, + "eval_logps/chosen": -157.0, + "eval_logps/rejected": -1112.0, + "eval_loss": 0.325927734375, + "eval_nll_loss": 0.326171875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.375, + "eval_rewards/margins": 13.0625, + "eval_rewards/rejected": 0.30078125, + "eval_runtime": 2.3449, + "eval_samples_per_second": 1.706, + "eval_steps_per_second": 0.426, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.513917830071538, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.610937476158142, + "logps/chosen": -441.6000061035156, + "logps/rejected": -712.4000244140625, + "loss": 0.444189453125, + "memory(GiB)": 47.61, + "nll_loss": 0.4437499940395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.300000190734863, + "rewards/margins": 15.300000190734863, + "rewards/rejected": 0.00937500037252903, + "step": 45, + "train_speed(iter/s)": 0.145239 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.4704081803690242, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.5890624523162842, + "logits/rejected": -1.7140624523162842, + "logps/chosen": -492.3999938964844, + "logps/rejected": -671.2000122070312, + "loss": 0.500921630859375, + "memory(GiB)": 47.61, + "nll_loss": 0.501171886920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.225000381469727, + "rewards/margins": 16.200000762939453, + "rewards/rejected": 0.04252929612994194, + "step": 50, + "train_speed(iter/s)": 0.146472 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.0, + "eval_logits/rejected": -1.65625, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.313720703125, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.9375, + "eval_rewards/margins": 14.625, + "eval_rewards/rejected": -0.69921875, + "eval_runtime": 2.283, + "eval_samples_per_second": 1.752, + "eval_steps_per_second": 0.438, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.31799240114671307, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.443750023841858, + "logits/rejected": -1.5265624523162842, + "logps/chosen": -471.20001220703125, + "logps/rejected": -620.7999877929688, + "loss": 0.4777099609375, + "memory(GiB)": 47.61, + "nll_loss": 0.478515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.662500381469727, + "rewards/margins": 15.987500190734863, + "rewards/rejected": 0.690625011920929, + "step": 55, + "train_speed(iter/s)": 0.147538 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.20661664017833653, + "learning_rate": 5e-05, + "logits/chosen": -1.587499976158142, + "logits/rejected": -1.357812523841858, + "logps/chosen": -514.7999877929688, + "logps/rejected": -496.0, + "loss": 0.43349609375, + "memory(GiB)": 47.61, + "nll_loss": 0.43359375, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.049999237060547, + "rewards/margins": 16.424999237060547, + "rewards/rejected": 0.6507812738418579, + "step": 60, + "train_speed(iter/s)": 0.148871 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -0.78125, + "eval_logits/rejected": -1.6484375, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.31396484375, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.875, + "eval_rewards/margins": 14.875, + "eval_rewards/rejected": -1.0, + "eval_runtime": 2.3335, + "eval_samples_per_second": 1.714, + "eval_steps_per_second": 0.429, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.5101990627724774, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.490625023841858, + "logits/rejected": -1.5187499523162842, + "logps/chosen": -452.3999938964844, + "logps/rejected": -550.7999877929688, + "loss": 0.4079315185546875, + "memory(GiB)": 47.61, + "nll_loss": 0.408203125, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.987499237060547, + "rewards/margins": 17.399999618530273, + "rewards/rejected": -0.40937501192092896, + "step": 65, + "train_speed(iter/s)": 0.148763 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.40382497884369994, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.375, + "logits/rejected": -1.532812476158142, + "logps/chosen": -402.20001220703125, + "logps/rejected": -646.4000244140625, + "loss": 0.44562835693359376, + "memory(GiB)": 47.61, + "nll_loss": 0.4457031190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.137500762939453, + "rewards/margins": 16.899999618530273, + "rewards/rejected": 0.24648436903953552, + "step": 70, + "train_speed(iter/s)": 0.14889 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -0.71875, + "eval_logits/rejected": -1.6484375, + "eval_logps/chosen": -151.0, + "eval_logps/rejected": -1112.0, + "eval_loss": 0.312744140625, + "eval_nll_loss": 0.3125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.9375, + "eval_rewards/margins": 13.875, + "eval_rewards/rejected": 0.099609375, + "eval_runtime": 2.2858, + "eval_samples_per_second": 1.75, + "eval_steps_per_second": 0.437, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.6457723772811964, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.376562476158142, + "logits/rejected": -1.412500023841858, + "logps/chosen": -511.20001220703125, + "logps/rejected": -682.0, + "loss": 0.4864990234375, + "memory(GiB)": 47.61, + "nll_loss": 0.48710936307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.375, + "rewards/margins": 16.825000762939453, + "rewards/rejected": 0.583203136920929, + "step": 75, + "train_speed(iter/s)": 0.148551 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5530412658330373, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.4093749523162842, + "logits/rejected": -1.4171874523162842, + "logps/chosen": -432.3999938964844, + "logps/rejected": -624.0, + "loss": 0.463671875, + "memory(GiB)": 47.61, + "nll_loss": 0.47734373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.875, + "rewards/margins": 17.512500762939453, + "rewards/rejected": -0.6333984136581421, + "step": 80, + "train_speed(iter/s)": 0.147818 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -0.69921875, + "eval_logits/rejected": -1.609375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.307373046875, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.125, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.3551, + "eval_samples_per_second": 1.698, + "eval_steps_per_second": 0.425, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.4844426897846391, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.256250023841858, + "logits/rejected": -1.537500023841858, + "logps/chosen": -392.3999938964844, + "logps/rejected": -744.7999877929688, + "loss": 0.40716094970703126, + "memory(GiB)": 47.61, + "nll_loss": 0.40703123807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.162500381469727, + "rewards/margins": 15.800000190734863, + "rewards/rejected": 1.385156273841858, + "step": 85, + "train_speed(iter/s)": 0.147889 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.31612081336180875, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.4296875, + "logits/rejected": -1.2374999523162842, + "logps/chosen": -483.20001220703125, + "logps/rejected": -590.0, + "loss": 0.4235595703125, + "memory(GiB)": 47.61, + "nll_loss": 0.423828125, + "rewards/accuracies": 1.0, + "rewards/chosen": 18.549999237060547, + "rewards/margins": 16.887500762939453, + "rewards/rejected": 1.6085937023162842, + "step": 90, + "train_speed(iter/s)": 0.149275 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -0.6953125, + "eval_logits/rejected": -1.6015625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.306396484375, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.1875, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.3, + "eval_samples_per_second": 1.739, + "eval_steps_per_second": 0.435, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.24964156861715608, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -1.360937476158142, + "logits/rejected": -1.467187523841858, + "logps/chosen": -524.4000244140625, + "logps/rejected": -720.0, + "loss": 0.46888427734375, + "memory(GiB)": 47.61, + "nll_loss": 0.46875, + "rewards/accuracies": 1.0, + "rewards/chosen": 19.375, + "rewards/margins": 18.424999237060547, + "rewards/rejected": 0.9701172113418579, + "step": 95, + "train_speed(iter/s)": 0.148344 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.42005408649300036, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -1.392187476158142, + "logits/rejected": -1.546875, + "logps/chosen": -405.3999938964844, + "logps/rejected": -564.4000244140625, + "loss": 0.40069580078125, + "memory(GiB)": 56.86, + "nll_loss": 0.4007812440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.799999237060547, + "rewards/margins": 17.225000381469727, + "rewards/rejected": -0.4281249940395355, + "step": 100, + "train_speed(iter/s)": 0.149184 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -0.69140625, + "eval_logits/rejected": -1.6015625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.30712890625, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.125, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.2948, + "eval_samples_per_second": 1.743, + "eval_steps_per_second": 0.436, + "step": 100 + }, + { + "epoch": 2.763157894736842, + "grad_norm": 0.5701405477605673, + "learning_rate": 1.70370868554659e-06, + "logits/chosen": -1.412500023841858, + "logits/rejected": -1.515625, + "logps/chosen": -432.6000061035156, + "logps/rejected": -567.2000122070312, + "loss": 0.4181304931640625, + "memory(GiB)": 56.86, + "nll_loss": 0.4183593690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.212499618530273, + "rewards/margins": 16.3125, + "rewards/rejected": 0.895703136920929, + "step": 105, + "train_speed(iter/s)": 0.148863 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 0.5114480987922484, + "learning_rate": 3.380821129028489e-07, + "logits/chosen": -1.3484375476837158, + "logits/rejected": -1.5343749523162842, + "logps/chosen": -452.3999938964844, + "logps/rejected": -605.5999755859375, + "loss": 0.3962158203125, + "memory(GiB)": 56.86, + "nll_loss": 0.396484375, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.850000381469727, + "rewards/margins": 17.637500762939453, + "rewards/rejected": 0.22343750298023224, + "step": 110, + "train_speed(iter/s)": 0.149183 + }, + { + "epoch": 2.8947368421052633, + "eval_logits/chosen": -0.6875, + "eval_logits/rejected": -1.59375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.3076171875, + "eval_nll_loss": 0.30859375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.1875, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.323, + "eval_samples_per_second": 1.722, + "eval_steps_per_second": 0.43, + "step": 110 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 347389503471616.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..910de15042b38b21d1ffd30fb26ec6176527a29d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cca6d16ca5c664d1b07e14590d3d0bcd5bea63a33cc8b21067ece209aa1a26b0 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-110/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/README.md b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c5c0404813bb3d884c2d0750e24391042738c029 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-14b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a7c56e118080ac8fcfab07c714ee2f0d7ff595a8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "up_proj", + "gate_proj", + "q_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67c0bf01dc5edc7d46fc0332b9fdc44ded01f5ad --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:088983943ce10ef6b2fb7c54918aacbd7e8ff9ffcce272c0a3e07d9346744bca +size 68902296 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/args.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/args.json new file mode 100644 index 0000000000000000000000000000000000000000..1115c6d9c5a07f0a258efadd0ed2b11cdd05eeda --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-14b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..29f5ae19def968caec4835c140dc64f18763921a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:306fd03ae717cf8f98f28f3feb63819113c9e0e516fc1fb37a023bc8843cb21b +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..27625435e4aeefcde047c1a05af3db0da624b65e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:302e9d0a9db747d93a43c8f240467a785e43b86305e0a7830a96ffdee81720d9 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a64f15fcc54b541e4dbc964cd3459628b716a24 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a45893da260248ab3d12001c0258c038e02186479c73e72ef7f96ca06ef993aa +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..de01f9b28fd269c1b3fb1b72a723a182b752cc81 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d766b9f7123bb029f5909ac192378f299c463c1e0348624b5ca3f494135dedeb +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0b4d987b8bbb4706e8d5046f31c81c6c39fd1cc --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e03f5bcd34697db9f652211dd15f6616c67ab86e691ce93814cc671bcd298e66 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..34c958c8c5d1060c5485fc95ac387f3246869ce2 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75025b395aedbc2baa3afb4e7391ede1d8f2278d4fc65a369f23719df764694c +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1507a92f5149cfbf81dfdd6c4f6665aa817622ea --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d17a2af7141b1490eccd0c4eea9d376cfa565d37824a999697da2f1223a6864f +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f22ccfa9cb6ef869a647e1083677d56987b80f8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fa0c65321844d2ce0ce65de3411fc9973f3e327e4e89ae4e8bcc47154dc08b8 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8459674d5176bf1de6e3e48ebdb88d1c08100ceb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de1c58d4f94c2dd9e1c44f5bdcc3b6161fa2d62e7556033df0903635c4e1b80f +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..43eec646f564dc9e76d5e8f3171c89a45c056565 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcb2a4efc3e314ebb2f3cfcf6b8de4f301096f2cba69514fec3a1954e4286794 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f9873b05462fb31c872408c1fbc5fa06b1a5106 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8478ec52fe15cafbacba6a99ea433ac446dc52c8f7a424ad900dad132b6b710b +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8611ec147ba2721aab0a20b09c7a2b311ceaa65a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a75c2cac370dd83bf6233c436aeffaac04de47b8a8a1f4f4c6e77b940ffe8b07 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bfe9507b95e850e9c88a9948976528b6003497ee --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fb0a4cb7bd6c2886133af02a6a8d0a4e62892916c1de4f3b2ff45873e8e612d +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..00fa8dd30af72b20b7d1f1266d652e4ab8b6d5e5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:003881962508336e23d4fc3b713678d96db0284cf85af9600165a7e3bd006fd2 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2edd70e809603a7907dceeed982d3c475046fa6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec29ddcf52e54e9ec33eb915e80f653168bf5c89efb4d8b1422ab6b66635df8 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..262e0ecfae1c1f9e8c14e118a094bd550a8b5d44 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c29a1f7426bafff0a760e962066b2fa7f441e039a0e792ab9f6f3c7c9d55eb6 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/latest b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/latest new file mode 100644 index 0000000000000000000000000000000000000000..aad80f76777fd4d23b0b81026f4601524335cbe1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/latest @@ -0,0 +1 @@ +global_step114 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..80f51268a9828e9592a20d8ae8b2cd4ba4bc362c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d994b317c4df888a1a1aabc0c532e81f1fa34c18c8313cb2feadca3bb37194 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..62e581603d525611f9660b6e859462f72bbc9258 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5b05860618aa49c7f5d8c366d6ee73cf8b3b0d0adc17d9313b72621630d0aa +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..18b03e632222a58e33ea4fca874b9c52628cc5e1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7262faf861e984775b4fd85bc76a11b0b8b04037690e8a08a58cf9ff5328a042 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..41735b5f7ace13ffa57ebed3e7042f1a48ac17fb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9479cad91150e2e266d17eb95fe678579a770f6df6b53496cf72067b186b094d +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..ebbb93c1d99b1645075ea27fc9fae66992a691f5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:435cb6cf559e0ce3fe0d4582cac16ea40b48b7a64589952402a4c399cafbfc00 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0656f84b77a33c9ceba9df16f36437b55ef71bc7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51001b0d8dc5792180c3a9705ccbfa66b61d46d7639afb6f7abf409629ed74f +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..32b3a313372ee4a2eeaeed69789f8fb4e2c70ad0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1e87084f11088fdce293e1fbbb05e35f5c7385b00e2f9ba195bf61cb36f757d +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..9c5a45264129fe1d7c409a6867de1a9751476a8e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d32e9bdd65145ae509e6c6ef4f6ea9d842f94a34c34a0d7d2ab6c248d3f2121 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a86ac614a477eb67963adb2c8c07f37c79ded059 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d7a9fd18bda7faa50931342147a7de5605bed0f91f6c70d821e84b7bf8f444f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a344889fbf9bdeda233e9fee88a06947ae3c4322 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/trainer_state.json @@ -0,0 +1,651 @@ +{ + "best_metric": 0.30639648, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90", + "epoch": 3.0, + "eval_steps": 10, + "global_step": 114, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 9.854079581094561, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -2.15625, + "logits/rejected": -1.40625, + "logps/chosen": -704.0, + "logps/rejected": -416.0, + "loss": 1.337890625, + "memory(GiB)": 9.88, + "nll_loss": 0.6484375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.089235 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.792355942314634, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.595703125, + "logits/rejected": -1.576171875, + "logps/chosen": -721.0, + "logps/rejected": -575.5, + "loss": 2.100830078125, + "memory(GiB)": 19.62, + "nll_loss": 1.4130859375, + "rewards/accuracies": 0.1875, + "rewards/chosen": 0.03759765625, + "rewards/margins": 0.01247406005859375, + "rewards/rejected": 0.0250244140625, + "step": 5, + "train_speed(iter/s)": 0.141028 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 6.728344330769066, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.524999976158142, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -620.7999877929688, + "logps/rejected": -703.2000122070312, + "loss": 2.03017578125, + "memory(GiB)": 46.15, + "nll_loss": 1.5515625476837158, + "rewards/accuracies": 0.75, + "rewards/chosen": 1.0109374523162842, + "rewards/margins": 0.6689453125, + "rewards/rejected": 0.34417724609375, + "step": 10, + "train_speed(iter/s)": 0.141858 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -0.9375, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -256.0, + "eval_logps/rejected": -1096.0, + "eval_loss": 0.7890625, + "eval_nll_loss": 0.65625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 3.4375, + "eval_rewards/margins": 2.09375, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 2.2882, + "eval_samples_per_second": 1.748, + "eval_steps_per_second": 0.437, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.3440116641074753, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.717187523841858, + "logits/rejected": -1.670312523841858, + "logps/chosen": -641.2000122070312, + "logps/rejected": -579.5999755859375, + "loss": 1.047705078125, + "memory(GiB)": 46.15, + "nll_loss": 0.864062488079071, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.159375190734863, + "rewards/margins": 2.5609374046325684, + "rewards/rejected": 1.6046874523162842, + "step": 15, + "train_speed(iter/s)": 0.146389 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 1.6177693098167876, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.6515624523162842, + "logits/rejected": -1.734375, + "logps/chosen": -400.3999938964844, + "logps/rejected": -564.4000244140625, + "loss": 0.8362548828125, + "memory(GiB)": 46.15, + "nll_loss": 0.8031250238418579, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.362500190734863, + "rewards/margins": 4.474999904632568, + "rewards/rejected": 3.8843750953674316, + "step": 20, + "train_speed(iter/s)": 0.148317 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -0.8828125, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -179.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.388671875, + "eval_nll_loss": 0.376953125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 5.125, + "eval_rewards/rejected": 6.0, + "eval_runtime": 2.32, + "eval_samples_per_second": 1.724, + "eval_steps_per_second": 0.431, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.2977003336577986, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6531250476837158, + "logits/rejected": -1.646875023841858, + "logps/chosen": -560.0, + "logps/rejected": -661.5999755859375, + "loss": 0.57784423828125, + "memory(GiB)": 46.15, + "nll_loss": 0.571093738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.087499618530273, + "rewards/margins": 6.550000190734863, + "rewards/rejected": 4.537499904632568, + "step": 25, + "train_speed(iter/s)": 0.146297 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.7551717618560747, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.545312523841858, + "logits/rejected": -1.78125, + "logps/chosen": -405.20001220703125, + "logps/rejected": -594.0, + "loss": 0.5139404296875, + "memory(GiB)": 46.15, + "nll_loss": 0.513671875, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.925000190734863, + "rewards/margins": 8.675000190734863, + "rewards/rejected": 3.2593750953674316, + "step": 30, + "train_speed(iter/s)": 0.147047 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.015625, + "eval_logits/rejected": -1.8203125, + "eval_logps/chosen": -168.0, + "eval_logps/rejected": -1088.0, + "eval_loss": 0.347900390625, + "eval_nll_loss": 0.34765625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 12.3125, + "eval_rewards/margins": 9.5625, + "eval_rewards/rejected": 2.75, + "eval_runtime": 2.3228, + "eval_samples_per_second": 1.722, + "eval_steps_per_second": 0.431, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.8190181064971851, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.7468750476837158, + "logits/rejected": -1.735937476158142, + "logps/chosen": -620.4000244140625, + "logps/rejected": -643.5999755859375, + "loss": 0.56610107421875, + "memory(GiB)": 46.15, + "nll_loss": 0.565625011920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.862500190734863, + "rewards/margins": 10.087499618530273, + "rewards/rejected": 3.7906250953674316, + "step": 35, + "train_speed(iter/s)": 0.145437 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7484729451276256, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.6843750476837158, + "logits/rejected": -1.7156250476837158, + "logps/chosen": -469.6000061035156, + "logps/rejected": -676.0, + "loss": 0.530419921875, + "memory(GiB)": 46.15, + "nll_loss": 0.582812488079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.425000190734863, + "rewards/margins": 11.5625, + "rewards/rejected": 2.8515625, + "step": 40, + "train_speed(iter/s)": 0.146795 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.0078125, + "eval_logits/rejected": -1.75, + "eval_logps/chosen": -157.0, + "eval_logps/rejected": -1112.0, + "eval_loss": 0.325927734375, + "eval_nll_loss": 0.326171875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.375, + "eval_rewards/margins": 13.0625, + "eval_rewards/rejected": 0.30078125, + "eval_runtime": 2.3449, + "eval_samples_per_second": 1.706, + "eval_steps_per_second": 0.426, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.513917830071538, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.610937476158142, + "logps/chosen": -441.6000061035156, + "logps/rejected": -712.4000244140625, + "loss": 0.444189453125, + "memory(GiB)": 47.61, + "nll_loss": 0.4437499940395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.300000190734863, + "rewards/margins": 15.300000190734863, + "rewards/rejected": 0.00937500037252903, + "step": 45, + "train_speed(iter/s)": 0.145239 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.4704081803690242, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.5890624523162842, + "logits/rejected": -1.7140624523162842, + "logps/chosen": -492.3999938964844, + "logps/rejected": -671.2000122070312, + "loss": 0.500921630859375, + "memory(GiB)": 47.61, + "nll_loss": 0.501171886920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.225000381469727, + "rewards/margins": 16.200000762939453, + "rewards/rejected": 0.04252929612994194, + "step": 50, + "train_speed(iter/s)": 0.146472 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.0, + "eval_logits/rejected": -1.65625, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.313720703125, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.9375, + "eval_rewards/margins": 14.625, + "eval_rewards/rejected": -0.69921875, + "eval_runtime": 2.283, + "eval_samples_per_second": 1.752, + "eval_steps_per_second": 0.438, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.31799240114671307, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.443750023841858, + "logits/rejected": -1.5265624523162842, + "logps/chosen": -471.20001220703125, + "logps/rejected": -620.7999877929688, + "loss": 0.4777099609375, + "memory(GiB)": 47.61, + "nll_loss": 0.478515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.662500381469727, + "rewards/margins": 15.987500190734863, + "rewards/rejected": 0.690625011920929, + "step": 55, + "train_speed(iter/s)": 0.147538 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.20661664017833653, + "learning_rate": 5e-05, + "logits/chosen": -1.587499976158142, + "logits/rejected": -1.357812523841858, + "logps/chosen": -514.7999877929688, + "logps/rejected": -496.0, + "loss": 0.43349609375, + "memory(GiB)": 47.61, + "nll_loss": 0.43359375, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.049999237060547, + "rewards/margins": 16.424999237060547, + "rewards/rejected": 0.6507812738418579, + "step": 60, + "train_speed(iter/s)": 0.148871 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -0.78125, + "eval_logits/rejected": -1.6484375, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.31396484375, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.875, + "eval_rewards/margins": 14.875, + "eval_rewards/rejected": -1.0, + "eval_runtime": 2.3335, + "eval_samples_per_second": 1.714, + "eval_steps_per_second": 0.429, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.5101990627724774, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.490625023841858, + "logits/rejected": -1.5187499523162842, + "logps/chosen": -452.3999938964844, + "logps/rejected": -550.7999877929688, + "loss": 0.4079315185546875, + "memory(GiB)": 47.61, + "nll_loss": 0.408203125, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.987499237060547, + "rewards/margins": 17.399999618530273, + "rewards/rejected": -0.40937501192092896, + "step": 65, + "train_speed(iter/s)": 0.148763 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.40382497884369994, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.375, + "logits/rejected": -1.532812476158142, + "logps/chosen": -402.20001220703125, + "logps/rejected": -646.4000244140625, + "loss": 0.44562835693359376, + "memory(GiB)": 47.61, + "nll_loss": 0.4457031190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.137500762939453, + "rewards/margins": 16.899999618530273, + "rewards/rejected": 0.24648436903953552, + "step": 70, + "train_speed(iter/s)": 0.14889 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -0.71875, + "eval_logits/rejected": -1.6484375, + "eval_logps/chosen": -151.0, + "eval_logps/rejected": -1112.0, + "eval_loss": 0.312744140625, + "eval_nll_loss": 0.3125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.9375, + "eval_rewards/margins": 13.875, + "eval_rewards/rejected": 0.099609375, + "eval_runtime": 2.2858, + "eval_samples_per_second": 1.75, + "eval_steps_per_second": 0.437, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.6457723772811964, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.376562476158142, + "logits/rejected": -1.412500023841858, + "logps/chosen": -511.20001220703125, + "logps/rejected": -682.0, + "loss": 0.4864990234375, + "memory(GiB)": 47.61, + "nll_loss": 0.48710936307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.375, + "rewards/margins": 16.825000762939453, + "rewards/rejected": 0.583203136920929, + "step": 75, + "train_speed(iter/s)": 0.148551 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5530412658330373, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.4093749523162842, + "logits/rejected": -1.4171874523162842, + "logps/chosen": -432.3999938964844, + "logps/rejected": -624.0, + "loss": 0.463671875, + "memory(GiB)": 47.61, + "nll_loss": 0.47734373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.875, + "rewards/margins": 17.512500762939453, + "rewards/rejected": -0.6333984136581421, + "step": 80, + "train_speed(iter/s)": 0.147818 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -0.69921875, + "eval_logits/rejected": -1.609375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.307373046875, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.125, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.3551, + "eval_samples_per_second": 1.698, + "eval_steps_per_second": 0.425, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.4844426897846391, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.256250023841858, + "logits/rejected": -1.537500023841858, + "logps/chosen": -392.3999938964844, + "logps/rejected": -744.7999877929688, + "loss": 0.40716094970703126, + "memory(GiB)": 47.61, + "nll_loss": 0.40703123807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.162500381469727, + "rewards/margins": 15.800000190734863, + "rewards/rejected": 1.385156273841858, + "step": 85, + "train_speed(iter/s)": 0.147889 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.31612081336180875, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.4296875, + "logits/rejected": -1.2374999523162842, + "logps/chosen": -483.20001220703125, + "logps/rejected": -590.0, + "loss": 0.4235595703125, + "memory(GiB)": 47.61, + "nll_loss": 0.423828125, + "rewards/accuracies": 1.0, + "rewards/chosen": 18.549999237060547, + "rewards/margins": 16.887500762939453, + "rewards/rejected": 1.6085937023162842, + "step": 90, + "train_speed(iter/s)": 0.149275 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -0.6953125, + "eval_logits/rejected": -1.6015625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.306396484375, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.1875, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.3, + "eval_samples_per_second": 1.739, + "eval_steps_per_second": 0.435, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.24964156861715608, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -1.360937476158142, + "logits/rejected": -1.467187523841858, + "logps/chosen": -524.4000244140625, + "logps/rejected": -720.0, + "loss": 0.46888427734375, + "memory(GiB)": 47.61, + "nll_loss": 0.46875, + "rewards/accuracies": 1.0, + "rewards/chosen": 19.375, + "rewards/margins": 18.424999237060547, + "rewards/rejected": 0.9701172113418579, + "step": 95, + "train_speed(iter/s)": 0.148344 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.42005408649300036, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -1.392187476158142, + "logits/rejected": -1.546875, + "logps/chosen": -405.3999938964844, + "logps/rejected": -564.4000244140625, + "loss": 0.40069580078125, + "memory(GiB)": 56.86, + "nll_loss": 0.4007812440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.799999237060547, + "rewards/margins": 17.225000381469727, + "rewards/rejected": -0.4281249940395355, + "step": 100, + "train_speed(iter/s)": 0.149184 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -0.69140625, + "eval_logits/rejected": -1.6015625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.30712890625, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.125, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.2948, + "eval_samples_per_second": 1.743, + "eval_steps_per_second": 0.436, + "step": 100 + }, + { + "epoch": 2.763157894736842, + "grad_norm": 0.5701405477605673, + "learning_rate": 1.70370868554659e-06, + "logits/chosen": -1.412500023841858, + "logits/rejected": -1.515625, + "logps/chosen": -432.6000061035156, + "logps/rejected": -567.2000122070312, + "loss": 0.4181304931640625, + "memory(GiB)": 56.86, + "nll_loss": 0.4183593690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.212499618530273, + "rewards/margins": 16.3125, + "rewards/rejected": 0.895703136920929, + "step": 105, + "train_speed(iter/s)": 0.148863 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 0.5114480987922484, + "learning_rate": 3.380821129028489e-07, + "logits/chosen": -1.3484375476837158, + "logits/rejected": -1.5343749523162842, + "logps/chosen": -452.3999938964844, + "logps/rejected": -605.5999755859375, + "loss": 0.3962158203125, + "memory(GiB)": 56.86, + "nll_loss": 0.396484375, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.850000381469727, + "rewards/margins": 17.637500762939453, + "rewards/rejected": 0.22343750298023224, + "step": 110, + "train_speed(iter/s)": 0.149183 + }, + { + "epoch": 2.8947368421052633, + "eval_logits/chosen": -0.6875, + "eval_logits/rejected": -1.59375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.3076171875, + "eval_nll_loss": 0.30859375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.1875, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.323, + "eval_samples_per_second": 1.722, + "eval_steps_per_second": 0.43, + "step": 110 + }, + { + "epoch": 3.0, + "eval_logits/chosen": -0.6875, + "eval_logits/rejected": -1.59375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.306884765625, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.1875, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.2759, + "eval_samples_per_second": 1.758, + "eval_steps_per_second": 0.439, + "step": 114 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 359754023305216.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..910de15042b38b21d1ffd30fb26ec6176527a29d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cca6d16ca5c664d1b07e14590d3d0bcd5bea63a33cc8b21067ece209aa1a26b0 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/README.md b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c5c0404813bb3d884c2d0750e24391042738c029 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-14b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a7c56e118080ac8fcfab07c714ee2f0d7ff595a8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "up_proj", + "gate_proj", + "q_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b47027eeced1593736d5131cfff7cd2c67437a51 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d1b8e0be53e98dac3fad5de553e1718c0bb40ee42db67c942c7aa7ed4557bac +size 68902296 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/args.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/args.json new file mode 100644 index 0000000000000000000000000000000000000000..1115c6d9c5a07f0a258efadd0ed2b11cdd05eeda --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-14b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e2fc14a31744889ce3d940af9bf95313f64e56d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:792a8df38f07181f122b1270d4c1713bf8b063398b324b9fd32866922469fe63 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0758e91c2d207fa55be83341b3ad6721949e2c14 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52b8ba8c338e7a3ac6040d2f9e503aebbb856a9650d54de472139556ba1e1ec7 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c57d3e58bce6585fa6957576c5b1f2d8378ed2f5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e12a61c0d27cc336467f43f751cfc77af852ac07431dc708bbe66f6b26f02797 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5e8f928267f7f3df38411e83586a9124de54fe5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b18f15ab5dfd2d046f766945ccde3d59a58b1f36f0d130454331a385f2ba9ed +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b1f21549397e15b46c9760211bbb979266d034d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b443c52feecbd69238c62db152de863e83aedb5c38a47f31c6c79c307bf379 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..873f3e4278038eefa41b796c47af75057eb3ccc0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ebf6e4433b96263e2928ac152cb530b770d69350e5004ae445045410976a84b +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8851eee4dd9696613a7f3ecdbf09f6c8e5fa4190 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c38bed7493dd5666a5e66bc073c6329a21125925085ed7011a871db512cf3977 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2342d32eb6cc393c89e8647dc558621e5a43d476 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01df9cf5a63222e2b6986d7b82228c3d1f127e33079230403f05e874f94f8b0e +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..05e86de4a177fb97f0c143afaf9dcc4098b99be6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a132ec25e6c1d34867a625464c63159b473e9e36f82fb5ad09c21b0f0e2f0ac +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f9133a15655d28ff1e1f4efe91c31e7e23f8652 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d5c0caa1f5dc62ea83df7fabafe5a04998d7fc8bda5c0c096bc17d662d172e +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5daf083fe068e189b1f8baafddbec11464dc22e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3560ed019a7041d0b4499958fd90fdf8b65fb1b9ffd6586fd02b6dedf0fd3e6 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a512bf6fde687517d099706ebfc3772cf0dd0b7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbd85a56f951ff2f07deefe86539ea952c880679b1c4e2f9ee7c3ed56420987d +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c40d0dd137dc6a8b7a558a55ecba8ee4ead596d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1e677785727f2607581fef5a18b4352b48abdf0cfd722ec2ce42ad93eee2a43 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..82f1cd849c85a6b1686d68e80c752956384ff336 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d1543250bde591b5d07d479b057fc29110efdeaec2a01eb5197442e26369a83 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..060173f536423c04902395d83daad4cabb4c0d1c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:becacd733d0ad2583d7f13e1391b1f851461e96d21383c9b925451006db3e502 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe8dfdf82d58293ce827ea0289aa96663a6b6d89 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1d35f02bb2d4604e9f127721a4072884330f4ef742c0a8fb81a39c810f2874b +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/latest b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/latest new file mode 100644 index 0000000000000000000000000000000000000000..75eab498d0366633484ab40334e4b8fb92b16dad --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/latest @@ -0,0 +1 @@ +global_step80 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..0b8b4067e4559b34f9b554c4963fe80d7f5fe839 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba4c26c615bd5830d41566fab54dc69174be292761b34514b27fbe82b45b630b +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..8c0265e51b5761ac9b323aa87ba00ba14b97e202 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c761d7f9b90c29c2d348a1133fd39be52c65e6bee4c2d179f6a6e564eb3a40 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f5dd5aabcd6e7332f14a4796d6ec6c758e10aea0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccab847cc956e055fd3f9dcce06898826d065211e945b83576c8d487f87c5469 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..bcbdea3a573c2b7717f23e2ea0e4a6da6670d65d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e5f1dbdcf6ec820c22fd1e4258fcd7af2a2bce65c480988d3f111aa574c9c06 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..1cdcb8d1710063a6c30dec635b4c44e3cb6cd24e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a23184c3e806d2649776427d1da2c0c9137f9b23a84468f3bdd5bbc75f696c9 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e39323a662c284cd109b5ce8c39e8a0ce375f2c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382fc01b809542bf6f5e26742e3e19e80a1f189ac5de24cf8cd822e303916b83 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ce685d2e57181f70debfb25eb90cb76ceaf47da --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b178265c7d2ae07bff10b7312e5e49b9f5b4914c38969d2f64a6ca006296bca +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5e363b8083cdd817e0b3a2e6fd1b65a905e189b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:668825a859126c4cf32afb883895c91004130b6aee02178736ca2840e5429ad0 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eaf96d6803aea265d756d902db3c4cc2386f9742 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90524bcdb94734ac7120e4205110f14662bff8cee00eed50355875dcdc538029 +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..66e4fc024a5f4597042e25ed5b83976a96001d15 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/trainer_state.json @@ -0,0 +1,475 @@ +{ + "best_metric": 0.30737305, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80", + "epoch": 2.1052631578947367, + "eval_steps": 10, + "global_step": 80, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 9.854079581094561, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -2.15625, + "logits/rejected": -1.40625, + "logps/chosen": -704.0, + "logps/rejected": -416.0, + "loss": 1.337890625, + "memory(GiB)": 9.88, + "nll_loss": 0.6484375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.089235 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.792355942314634, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.595703125, + "logits/rejected": -1.576171875, + "logps/chosen": -721.0, + "logps/rejected": -575.5, + "loss": 2.100830078125, + "memory(GiB)": 19.62, + "nll_loss": 1.4130859375, + "rewards/accuracies": 0.1875, + "rewards/chosen": 0.03759765625, + "rewards/margins": 0.01247406005859375, + "rewards/rejected": 0.0250244140625, + "step": 5, + "train_speed(iter/s)": 0.141028 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 6.728344330769066, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.524999976158142, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -620.7999877929688, + "logps/rejected": -703.2000122070312, + "loss": 2.03017578125, + "memory(GiB)": 46.15, + "nll_loss": 1.5515625476837158, + "rewards/accuracies": 0.75, + "rewards/chosen": 1.0109374523162842, + "rewards/margins": 0.6689453125, + "rewards/rejected": 0.34417724609375, + "step": 10, + "train_speed(iter/s)": 0.141858 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -0.9375, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -256.0, + "eval_logps/rejected": -1096.0, + "eval_loss": 0.7890625, + "eval_nll_loss": 0.65625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 3.4375, + "eval_rewards/margins": 2.09375, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 2.2882, + "eval_samples_per_second": 1.748, + "eval_steps_per_second": 0.437, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.3440116641074753, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.717187523841858, + "logits/rejected": -1.670312523841858, + "logps/chosen": -641.2000122070312, + "logps/rejected": -579.5999755859375, + "loss": 1.047705078125, + "memory(GiB)": 46.15, + "nll_loss": 0.864062488079071, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.159375190734863, + "rewards/margins": 2.5609374046325684, + "rewards/rejected": 1.6046874523162842, + "step": 15, + "train_speed(iter/s)": 0.146389 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 1.6177693098167876, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.6515624523162842, + "logits/rejected": -1.734375, + "logps/chosen": -400.3999938964844, + "logps/rejected": -564.4000244140625, + "loss": 0.8362548828125, + "memory(GiB)": 46.15, + "nll_loss": 0.8031250238418579, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.362500190734863, + "rewards/margins": 4.474999904632568, + "rewards/rejected": 3.8843750953674316, + "step": 20, + "train_speed(iter/s)": 0.148317 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -0.8828125, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -179.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.388671875, + "eval_nll_loss": 0.376953125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 5.125, + "eval_rewards/rejected": 6.0, + "eval_runtime": 2.32, + "eval_samples_per_second": 1.724, + "eval_steps_per_second": 0.431, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.2977003336577986, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6531250476837158, + "logits/rejected": -1.646875023841858, + "logps/chosen": -560.0, + "logps/rejected": -661.5999755859375, + "loss": 0.57784423828125, + "memory(GiB)": 46.15, + "nll_loss": 0.571093738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.087499618530273, + "rewards/margins": 6.550000190734863, + "rewards/rejected": 4.537499904632568, + "step": 25, + "train_speed(iter/s)": 0.146297 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.7551717618560747, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.545312523841858, + "logits/rejected": -1.78125, + "logps/chosen": -405.20001220703125, + "logps/rejected": -594.0, + "loss": 0.5139404296875, + "memory(GiB)": 46.15, + "nll_loss": 0.513671875, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.925000190734863, + "rewards/margins": 8.675000190734863, + "rewards/rejected": 3.2593750953674316, + "step": 30, + "train_speed(iter/s)": 0.147047 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.015625, + "eval_logits/rejected": -1.8203125, + "eval_logps/chosen": -168.0, + "eval_logps/rejected": -1088.0, + "eval_loss": 0.347900390625, + "eval_nll_loss": 0.34765625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 12.3125, + "eval_rewards/margins": 9.5625, + "eval_rewards/rejected": 2.75, + "eval_runtime": 2.3228, + "eval_samples_per_second": 1.722, + "eval_steps_per_second": 0.431, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.8190181064971851, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.7468750476837158, + "logits/rejected": -1.735937476158142, + "logps/chosen": -620.4000244140625, + "logps/rejected": -643.5999755859375, + "loss": 0.56610107421875, + "memory(GiB)": 46.15, + "nll_loss": 0.565625011920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.862500190734863, + "rewards/margins": 10.087499618530273, + "rewards/rejected": 3.7906250953674316, + "step": 35, + "train_speed(iter/s)": 0.145437 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7484729451276256, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.6843750476837158, + "logits/rejected": -1.7156250476837158, + "logps/chosen": -469.6000061035156, + "logps/rejected": -676.0, + "loss": 0.530419921875, + "memory(GiB)": 46.15, + "nll_loss": 0.582812488079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.425000190734863, + "rewards/margins": 11.5625, + "rewards/rejected": 2.8515625, + "step": 40, + "train_speed(iter/s)": 0.146795 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.0078125, + "eval_logits/rejected": -1.75, + "eval_logps/chosen": -157.0, + "eval_logps/rejected": -1112.0, + "eval_loss": 0.325927734375, + "eval_nll_loss": 0.326171875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.375, + "eval_rewards/margins": 13.0625, + "eval_rewards/rejected": 0.30078125, + "eval_runtime": 2.3449, + "eval_samples_per_second": 1.706, + "eval_steps_per_second": 0.426, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.513917830071538, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.610937476158142, + "logps/chosen": -441.6000061035156, + "logps/rejected": -712.4000244140625, + "loss": 0.444189453125, + "memory(GiB)": 47.61, + "nll_loss": 0.4437499940395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.300000190734863, + "rewards/margins": 15.300000190734863, + "rewards/rejected": 0.00937500037252903, + "step": 45, + "train_speed(iter/s)": 0.145239 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.4704081803690242, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.5890624523162842, + "logits/rejected": -1.7140624523162842, + "logps/chosen": -492.3999938964844, + "logps/rejected": -671.2000122070312, + "loss": 0.500921630859375, + "memory(GiB)": 47.61, + "nll_loss": 0.501171886920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.225000381469727, + "rewards/margins": 16.200000762939453, + "rewards/rejected": 0.04252929612994194, + "step": 50, + "train_speed(iter/s)": 0.146472 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.0, + "eval_logits/rejected": -1.65625, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.313720703125, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.9375, + "eval_rewards/margins": 14.625, + "eval_rewards/rejected": -0.69921875, + "eval_runtime": 2.283, + "eval_samples_per_second": 1.752, + "eval_steps_per_second": 0.438, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.31799240114671307, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.443750023841858, + "logits/rejected": -1.5265624523162842, + "logps/chosen": -471.20001220703125, + "logps/rejected": -620.7999877929688, + "loss": 0.4777099609375, + "memory(GiB)": 47.61, + "nll_loss": 0.478515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.662500381469727, + "rewards/margins": 15.987500190734863, + "rewards/rejected": 0.690625011920929, + "step": 55, + "train_speed(iter/s)": 0.147538 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.20661664017833653, + "learning_rate": 5e-05, + "logits/chosen": -1.587499976158142, + "logits/rejected": -1.357812523841858, + "logps/chosen": -514.7999877929688, + "logps/rejected": -496.0, + "loss": 0.43349609375, + "memory(GiB)": 47.61, + "nll_loss": 0.43359375, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.049999237060547, + "rewards/margins": 16.424999237060547, + "rewards/rejected": 0.6507812738418579, + "step": 60, + "train_speed(iter/s)": 0.148871 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -0.78125, + "eval_logits/rejected": -1.6484375, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.31396484375, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.875, + "eval_rewards/margins": 14.875, + "eval_rewards/rejected": -1.0, + "eval_runtime": 2.3335, + "eval_samples_per_second": 1.714, + "eval_steps_per_second": 0.429, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.5101990627724774, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.490625023841858, + "logits/rejected": -1.5187499523162842, + "logps/chosen": -452.3999938964844, + "logps/rejected": -550.7999877929688, + "loss": 0.4079315185546875, + "memory(GiB)": 47.61, + "nll_loss": 0.408203125, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.987499237060547, + "rewards/margins": 17.399999618530273, + "rewards/rejected": -0.40937501192092896, + "step": 65, + "train_speed(iter/s)": 0.148763 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.40382497884369994, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.375, + "logits/rejected": -1.532812476158142, + "logps/chosen": -402.20001220703125, + "logps/rejected": -646.4000244140625, + "loss": 0.44562835693359376, + "memory(GiB)": 47.61, + "nll_loss": 0.4457031190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.137500762939453, + "rewards/margins": 16.899999618530273, + "rewards/rejected": 0.24648436903953552, + "step": 70, + "train_speed(iter/s)": 0.14889 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -0.71875, + "eval_logits/rejected": -1.6484375, + "eval_logps/chosen": -151.0, + "eval_logps/rejected": -1112.0, + "eval_loss": 0.312744140625, + "eval_nll_loss": 0.3125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.9375, + "eval_rewards/margins": 13.875, + "eval_rewards/rejected": 0.099609375, + "eval_runtime": 2.2858, + "eval_samples_per_second": 1.75, + "eval_steps_per_second": 0.437, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.6457723772811964, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.376562476158142, + "logits/rejected": -1.412500023841858, + "logps/chosen": -511.20001220703125, + "logps/rejected": -682.0, + "loss": 0.4864990234375, + "memory(GiB)": 47.61, + "nll_loss": 0.48710936307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.375, + "rewards/margins": 16.825000762939453, + "rewards/rejected": 0.583203136920929, + "step": 75, + "train_speed(iter/s)": 0.148551 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5530412658330373, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.4093749523162842, + "logits/rejected": -1.4171874523162842, + "logps/chosen": -432.3999938964844, + "logps/rejected": -624.0, + "loss": 0.463671875, + "memory(GiB)": 47.61, + "nll_loss": 0.47734373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.875, + "rewards/margins": 17.512500762939453, + "rewards/rejected": -0.6333984136581421, + "step": 80, + "train_speed(iter/s)": 0.147818 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -0.69921875, + "eval_logits/rejected": -1.609375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.307373046875, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.125, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.3551, + "eval_samples_per_second": 1.698, + "eval_steps_per_second": 0.425, + "step": 80 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 254195476463616.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..910de15042b38b21d1ffd30fb26ec6176527a29d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cca6d16ca5c664d1b07e14590d3d0bcd5bea63a33cc8b21067ece209aa1a26b0 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-80/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/README.md b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c5c0404813bb3d884c2d0750e24391042738c029 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-14b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a7c56e118080ac8fcfab07c714ee2f0d7ff595a8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "up_proj", + "gate_proj", + "q_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dabacd92e9c183c31cce9578384bc5054b2e3b54 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:731bdd07b5dc0d86ddb32b109ab07bc977d48b513072d8bb062abfaa0ba0309c +size 68902296 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/args.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/args.json new file mode 100644 index 0000000000000000000000000000000000000000..1115c6d9c5a07f0a258efadd0ed2b11cdd05eeda --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-14b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-14b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..08d764ddcf7a2b80ab0f2334ecfbe152580c01e5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ebbd5977048169508f4d06fe5aec48d7cf18a1b05dcc970be2992d35385cb50 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c45b3834ae32d64206c01a86e2e69b698ead34bc --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14a6fbdc27ed97a1432fe9928c3ff3004dd8f9f0582feb98fa26f336b794c387 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a69583a2ea8298459a84badf373cf16e2555d6c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f3def50568c42c0a17b6956b9ce1fec43d37b1017e13bbd249712cc7e6fe975 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e59bc1e11d571e1147d56e10178413e52790a940 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1e317385caa815bdf6f202cb0fea0b22d0fc559086ab4e759dc6d59f7660d44 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5e299b89130ec1e77f9df2326222c86f7ecb8ba --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72e9ee508531ceb16b328d25dffea63f32a3e693bdfc5e42ff52bbcf8a30c01e +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c026dfbf00ff408c474b7cf4dafaa282f79e07b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaa6294383599c8d7a47e055ecbb84fa296c444018beedb1e40e6e112942b682 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..16c1ed333d64b146be1b75f61cb75a6231a98063 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e05bf54aba63c3e516abf247ea47e81fb42fad7f6409629f00df612b4e9479 +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7980c667b19eeb2b3d1c19bc1f2cd4eb47a5481 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14ac77d01398358de94f9744b43680d4f883bcbe143af322e19575defa29c71f +size 51613616 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..902240eb87a78d637ac373831cba6ecc56153534 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb13080749a65b63ac65449f8a3f8a01f562a1f39f2452c48c7f588bd7eea0b5 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a2b9a1a41ac2c8e9aaf22b9aee01cbd96316793 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77bf7c00e4e8f27e1386a6f463f82e22dacae418773442295c7a6150bc534669 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fbefb263b3b25ecf78e50e78f6b0447edc6cf2b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d6bb166980243cd44320fdc36511999b08dd074ec072b15074e0abbbb346c73 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3db4911aec6a0ff6d91acb611346193573b4a94 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aacb2c7e389a7616caa21409777bd17a902bc249d0502d51b0bd0e09cf068b20 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4ad54f556a4fbf8a5343c33efc1c6e61edc1c3f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2a10f63355732c06e45b15116f04748b05efd74f13d532c707ef1fae6235e8e +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3c4f7c7524f85b80de1cb6e8214b913ea994af6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215c98b528870000bd6c383dac4fb22ef4a0957ec7e406109e6ba1600dc8a7d8 +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b68ee8493c80a9bb16791efbc63e05a5451e05ed --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f66c8cbab676566ba8762a4597e92092a9ad8f768c647508c0838e5a2575003b +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8582ec2511fec325a8fc968e30b3c7857cc4e428 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d075470e2153866bfc1cc14323d4495949224f9c9651b5a756c88018ae56a3e +size 664974 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/latest b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/latest new file mode 100644 index 0000000000000000000000000000000000000000..8e7a337e2cb23bf07023d223dd647df2d25f0fc1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/latest @@ -0,0 +1 @@ +global_step90 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e31a2394e12bf431ae13288c3d90fe4727f07fa7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feb6462d333dbc5bb5e497ea9b0adb960f7616f79e6eea63222de6d5bd559516 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d1db0a0f44aa3ac1d82c3bf8dc2d8968eeba4ce7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b045e1bfa728f51c8b51ab0faa20b128a4fbd350da006b9b39a19e24abdf5a74 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..75de18f57a056bd6a5f89df1abd045678f3f919e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76a3d058d2628a61848c2441d313f251278bd8f74ce43dc44d8cd8ad3e619a8 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fd100693bc9f3267d044ce4a16e702502dc03ec --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f72fc498e6eaa671cdc0e8a627a668b8ef607063a22ddb4edbc05e791be830 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..5aeeabfe119f1cb0c8c804f1b9a4d3049f478d69 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12889af98e175b734a788f4c5b8c4da91dd61ff3a05aaf61b9d4c66aa3dd8ad6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..91fe0f42382ab06f4d26d753745a914c9e46100e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe21a86abfceeac2cf2f48afd61a9a506cf61a287f3403f1adf391bb2ffa5a83 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..5830ca6bd04645962b6e56a00a91cd8349ca449c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73488bec91f9dee6d8105d06f99edaf4d27b6b064250d4c7023f33285b2f3132 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..343d1c0475f0dc64100dc67b09195e047f1a7bcf --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf6ee1cc2e1325b428a21172ec4e61b7220c5489751ea11c06bb66c77a0cd08 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a39c9cfeaa2d69cb5a66e83272eee65ddffaed5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b768777109679597db5d1fa24a743962bede33623e22702b13b95eab2d42cb8 +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0ca1ac0e6d579b5cc6c0a72af8d540edfc8f2f2f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/trainer_state.json @@ -0,0 +1,528 @@ +{ + "best_metric": 0.30639648, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90", + "epoch": 2.3684210526315788, + "eval_steps": 10, + "global_step": 90, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 9.854079581094561, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -2.15625, + "logits/rejected": -1.40625, + "logps/chosen": -704.0, + "logps/rejected": -416.0, + "loss": 1.337890625, + "memory(GiB)": 9.88, + "nll_loss": 0.6484375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.089235 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.792355942314634, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.595703125, + "logits/rejected": -1.576171875, + "logps/chosen": -721.0, + "logps/rejected": -575.5, + "loss": 2.100830078125, + "memory(GiB)": 19.62, + "nll_loss": 1.4130859375, + "rewards/accuracies": 0.1875, + "rewards/chosen": 0.03759765625, + "rewards/margins": 0.01247406005859375, + "rewards/rejected": 0.0250244140625, + "step": 5, + "train_speed(iter/s)": 0.141028 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 6.728344330769066, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.524999976158142, + "logits/rejected": -1.6375000476837158, + "logps/chosen": -620.7999877929688, + "logps/rejected": -703.2000122070312, + "loss": 2.03017578125, + "memory(GiB)": 46.15, + "nll_loss": 1.5515625476837158, + "rewards/accuracies": 0.75, + "rewards/chosen": 1.0109374523162842, + "rewards/margins": 0.6689453125, + "rewards/rejected": 0.34417724609375, + "step": 10, + "train_speed(iter/s)": 0.141858 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -0.9375, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -256.0, + "eval_logps/rejected": -1096.0, + "eval_loss": 0.7890625, + "eval_nll_loss": 0.65625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 3.4375, + "eval_rewards/margins": 2.09375, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 2.2882, + "eval_samples_per_second": 1.748, + "eval_steps_per_second": 0.437, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.3440116641074753, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.717187523841858, + "logits/rejected": -1.670312523841858, + "logps/chosen": -641.2000122070312, + "logps/rejected": -579.5999755859375, + "loss": 1.047705078125, + "memory(GiB)": 46.15, + "nll_loss": 0.864062488079071, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 4.159375190734863, + "rewards/margins": 2.5609374046325684, + "rewards/rejected": 1.6046874523162842, + "step": 15, + "train_speed(iter/s)": 0.146389 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 1.6177693098167876, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.6515624523162842, + "logits/rejected": -1.734375, + "logps/chosen": -400.3999938964844, + "logps/rejected": -564.4000244140625, + "loss": 0.8362548828125, + "memory(GiB)": 46.15, + "nll_loss": 0.8031250238418579, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.362500190734863, + "rewards/margins": 4.474999904632568, + "rewards/rejected": 3.8843750953674316, + "step": 20, + "train_speed(iter/s)": 0.148317 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -0.8828125, + "eval_logits/rejected": -1.828125, + "eval_logps/chosen": -179.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.388671875, + "eval_nll_loss": 0.376953125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 5.125, + "eval_rewards/rejected": 6.0, + "eval_runtime": 2.32, + "eval_samples_per_second": 1.724, + "eval_steps_per_second": 0.431, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.2977003336577986, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6531250476837158, + "logits/rejected": -1.646875023841858, + "logps/chosen": -560.0, + "logps/rejected": -661.5999755859375, + "loss": 0.57784423828125, + "memory(GiB)": 46.15, + "nll_loss": 0.571093738079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.087499618530273, + "rewards/margins": 6.550000190734863, + "rewards/rejected": 4.537499904632568, + "step": 25, + "train_speed(iter/s)": 0.146297 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.7551717618560747, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.545312523841858, + "logits/rejected": -1.78125, + "logps/chosen": -405.20001220703125, + "logps/rejected": -594.0, + "loss": 0.5139404296875, + "memory(GiB)": 46.15, + "nll_loss": 0.513671875, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.925000190734863, + "rewards/margins": 8.675000190734863, + "rewards/rejected": 3.2593750953674316, + "step": 30, + "train_speed(iter/s)": 0.147047 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.015625, + "eval_logits/rejected": -1.8203125, + "eval_logps/chosen": -168.0, + "eval_logps/rejected": -1088.0, + "eval_loss": 0.347900390625, + "eval_nll_loss": 0.34765625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 12.3125, + "eval_rewards/margins": 9.5625, + "eval_rewards/rejected": 2.75, + "eval_runtime": 2.3228, + "eval_samples_per_second": 1.722, + "eval_steps_per_second": 0.431, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.8190181064971851, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.7468750476837158, + "logits/rejected": -1.735937476158142, + "logps/chosen": -620.4000244140625, + "logps/rejected": -643.5999755859375, + "loss": 0.56610107421875, + "memory(GiB)": 46.15, + "nll_loss": 0.565625011920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.862500190734863, + "rewards/margins": 10.087499618530273, + "rewards/rejected": 3.7906250953674316, + "step": 35, + "train_speed(iter/s)": 0.145437 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7484729451276256, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.6843750476837158, + "logits/rejected": -1.7156250476837158, + "logps/chosen": -469.6000061035156, + "logps/rejected": -676.0, + "loss": 0.530419921875, + "memory(GiB)": 46.15, + "nll_loss": 0.582812488079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.425000190734863, + "rewards/margins": 11.5625, + "rewards/rejected": 2.8515625, + "step": 40, + "train_speed(iter/s)": 0.146795 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.0078125, + "eval_logits/rejected": -1.75, + "eval_logps/chosen": -157.0, + "eval_logps/rejected": -1112.0, + "eval_loss": 0.325927734375, + "eval_nll_loss": 0.326171875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.375, + "eval_rewards/margins": 13.0625, + "eval_rewards/rejected": 0.30078125, + "eval_runtime": 2.3449, + "eval_samples_per_second": 1.706, + "eval_steps_per_second": 0.426, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.513917830071538, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.610937476158142, + "logps/chosen": -441.6000061035156, + "logps/rejected": -712.4000244140625, + "loss": 0.444189453125, + "memory(GiB)": 47.61, + "nll_loss": 0.4437499940395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.300000190734863, + "rewards/margins": 15.300000190734863, + "rewards/rejected": 0.00937500037252903, + "step": 45, + "train_speed(iter/s)": 0.145239 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.4704081803690242, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.5890624523162842, + "logits/rejected": -1.7140624523162842, + "logps/chosen": -492.3999938964844, + "logps/rejected": -671.2000122070312, + "loss": 0.500921630859375, + "memory(GiB)": 47.61, + "nll_loss": 0.501171886920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.225000381469727, + "rewards/margins": 16.200000762939453, + "rewards/rejected": 0.04252929612994194, + "step": 50, + "train_speed(iter/s)": 0.146472 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.0, + "eval_logits/rejected": -1.65625, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.313720703125, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.9375, + "eval_rewards/margins": 14.625, + "eval_rewards/rejected": -0.69921875, + "eval_runtime": 2.283, + "eval_samples_per_second": 1.752, + "eval_steps_per_second": 0.438, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.31799240114671307, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.443750023841858, + "logits/rejected": -1.5265624523162842, + "logps/chosen": -471.20001220703125, + "logps/rejected": -620.7999877929688, + "loss": 0.4777099609375, + "memory(GiB)": 47.61, + "nll_loss": 0.478515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.662500381469727, + "rewards/margins": 15.987500190734863, + "rewards/rejected": 0.690625011920929, + "step": 55, + "train_speed(iter/s)": 0.147538 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.20661664017833653, + "learning_rate": 5e-05, + "logits/chosen": -1.587499976158142, + "logits/rejected": -1.357812523841858, + "logps/chosen": -514.7999877929688, + "logps/rejected": -496.0, + "loss": 0.43349609375, + "memory(GiB)": 47.61, + "nll_loss": 0.43359375, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.049999237060547, + "rewards/margins": 16.424999237060547, + "rewards/rejected": 0.6507812738418579, + "step": 60, + "train_speed(iter/s)": 0.148871 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -0.78125, + "eval_logits/rejected": -1.6484375, + "eval_logps/chosen": -152.0, + "eval_logps/rejected": -1120.0, + "eval_loss": 0.31396484375, + "eval_nll_loss": 0.314453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.875, + "eval_rewards/margins": 14.875, + "eval_rewards/rejected": -1.0, + "eval_runtime": 2.3335, + "eval_samples_per_second": 1.714, + "eval_steps_per_second": 0.429, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.5101990627724774, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.490625023841858, + "logits/rejected": -1.5187499523162842, + "logps/chosen": -452.3999938964844, + "logps/rejected": -550.7999877929688, + "loss": 0.4079315185546875, + "memory(GiB)": 47.61, + "nll_loss": 0.408203125, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.987499237060547, + "rewards/margins": 17.399999618530273, + "rewards/rejected": -0.40937501192092896, + "step": 65, + "train_speed(iter/s)": 0.148763 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.40382497884369994, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.375, + "logits/rejected": -1.532812476158142, + "logps/chosen": -402.20001220703125, + "logps/rejected": -646.4000244140625, + "loss": 0.44562835693359376, + "memory(GiB)": 47.61, + "nll_loss": 0.4457031190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.137500762939453, + "rewards/margins": 16.899999618530273, + "rewards/rejected": 0.24648436903953552, + "step": 70, + "train_speed(iter/s)": 0.14889 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -0.71875, + "eval_logits/rejected": -1.6484375, + "eval_logps/chosen": -151.0, + "eval_logps/rejected": -1112.0, + "eval_loss": 0.312744140625, + "eval_nll_loss": 0.3125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.9375, + "eval_rewards/margins": 13.875, + "eval_rewards/rejected": 0.099609375, + "eval_runtime": 2.2858, + "eval_samples_per_second": 1.75, + "eval_steps_per_second": 0.437, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.6457723772811964, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.376562476158142, + "logits/rejected": -1.412500023841858, + "logps/chosen": -511.20001220703125, + "logps/rejected": -682.0, + "loss": 0.4864990234375, + "memory(GiB)": 47.61, + "nll_loss": 0.48710936307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.375, + "rewards/margins": 16.825000762939453, + "rewards/rejected": 0.583203136920929, + "step": 75, + "train_speed(iter/s)": 0.148551 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5530412658330373, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.4093749523162842, + "logits/rejected": -1.4171874523162842, + "logps/chosen": -432.3999938964844, + "logps/rejected": -624.0, + "loss": 0.463671875, + "memory(GiB)": 47.61, + "nll_loss": 0.47734373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.875, + "rewards/margins": 17.512500762939453, + "rewards/rejected": -0.6333984136581421, + "step": 80, + "train_speed(iter/s)": 0.147818 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -0.69921875, + "eval_logits/rejected": -1.609375, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.307373046875, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.125, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.3551, + "eval_samples_per_second": 1.698, + "eval_steps_per_second": 0.425, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.4844426897846391, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.256250023841858, + "logits/rejected": -1.537500023841858, + "logps/chosen": -392.3999938964844, + "logps/rejected": -744.7999877929688, + "loss": 0.40716094970703126, + "memory(GiB)": 47.61, + "nll_loss": 0.40703123807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.162500381469727, + "rewards/margins": 15.800000190734863, + "rewards/rejected": 1.385156273841858, + "step": 85, + "train_speed(iter/s)": 0.147889 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.31612081336180875, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.4296875, + "logits/rejected": -1.2374999523162842, + "logps/chosen": -483.20001220703125, + "logps/rejected": -590.0, + "loss": 0.4235595703125, + "memory(GiB)": 47.61, + "nll_loss": 0.423828125, + "rewards/accuracies": 1.0, + "rewards/chosen": 18.549999237060547, + "rewards/margins": 16.887500762939453, + "rewards/rejected": 1.6085937023162842, + "step": 90, + "train_speed(iter/s)": 0.149275 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -0.6953125, + "eval_logits/rejected": -1.6015625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1104.0, + "eval_loss": 0.306396484375, + "eval_nll_loss": 0.306640625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.1875, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.796875, + "eval_runtime": 2.3, + "eval_samples_per_second": 1.739, + "eval_steps_per_second": 0.435, + "step": 90 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 283981896744960.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..910de15042b38b21d1ffd30fb26ec6176527a29d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cca6d16ca5c664d1b07e14590d3d0bcd5bea63a33cc8b21067ece209aa1a26b0 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_logits_chosen.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_logits_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..447a19a5cd663f443282db018ceb5764a991e648 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_logits_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_logits_rejected.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_logits_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..7e44058caafaaae59a818ec0ebc037b1cdcc8e27 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_logits_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_logps_chosen.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_logps_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..d83f838908702984efc05b8641b296d537de6561 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_logps_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_logps_rejected.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_logps_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..1a4a3c9a4c59c4feb50ffc5d1ac627459c66f0f4 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_logps_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_loss.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..ed232fd904bc1087b4226b7fa2d42b5386d68a05 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_nll_loss.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_nll_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..605e8b6ecbc58e4adfb55af84b003dfbfedc7a70 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_nll_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_rewards_accuracies.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_rewards_accuracies.png new file mode 100644 index 0000000000000000000000000000000000000000..2d2698dd0f6f8ad18db2b94a0c23ae94df643a78 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_rewards_accuracies.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_rewards_chosen.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_rewards_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..b3e01419035969a02472795a39824c0e15c64d4c Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_rewards_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_rewards_margins.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_rewards_margins.png new file mode 100644 index 0000000000000000000000000000000000000000..923a56c124b9e9d95004b9b469a73498ad6569ca Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_rewards_margins.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_rewards_rejected.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_rewards_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..b68b9cfb8af9f153068556902d3f88a83c6b02b5 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_rewards_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_runtime.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..f7a0ba34b0cd63904e90fdd4c00ce666bef36f1c Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_runtime.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_samples_per_second.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_samples_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..8c317d2613b1e2aa599ba38d942d3d17bdc7b819 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_samples_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_steps_per_second.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_steps_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..f849d3aabf82df3212a27782790782470d2ce53d Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/eval_steps_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_epoch.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_epoch.png new file mode 100644 index 0000000000000000000000000000000000000000..c152c9bbb1100d680125e899d3d0da3db98dc221 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_epoch.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_grad_norm.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_grad_norm.png new file mode 100644 index 0000000000000000000000000000000000000000..b0b477873abd4a4a453662d3a6e4d9887db4fd44 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_grad_norm.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_learning_rate.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..e35e42c173f451f9666ba448872c615528441c9e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_learning_rate.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_logits_chosen.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_logits_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..cf42357c4ec97c0d36f2275d2139dcc3248623fa Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_logits_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_logits_rejected.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_logits_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..4883dadec289f5b2dcb3f98ee56b940846117393 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_logits_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_logps_chosen.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_logps_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..2755c6c79835dbcdc7104b3f442fc338f2fde638 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_logps_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_logps_rejected.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_logps_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..50a1cd23a7876f8d6ce4a5d581c01170a8583a50 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_logps_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_loss.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..97b8789e8ed1e61d5154b4bc0f7e93b69bce85bf Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_memory(GiB).png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_memory(GiB).png new file mode 100644 index 0000000000000000000000000000000000000000..573ed603524e7b90a5463b10b8d384313b6250af Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_memory(GiB).png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_nll_loss.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_nll_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..7dc88f19f2d81af9230bcdb616c3006230073a3c Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_nll_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_rewards_accuracies.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_rewards_accuracies.png new file mode 100644 index 0000000000000000000000000000000000000000..ec4734c9cd108b28eb7184185d8b88309e9d83f5 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_rewards_accuracies.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_rewards_chosen.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_rewards_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..e50ed9ceaf44acf6fb345d4435082850627c5c0e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_rewards_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_rewards_margins.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_rewards_margins.png new file mode 100644 index 0000000000000000000000000000000000000000..05e58783c5b749044da260b19775a63e2eb8084c Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_rewards_margins.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_rewards_rejected.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_rewards_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..234c49cfbf0ddd0e05f0929387ce2d561e3a0bf9 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_rewards_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_total_flos.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_total_flos.png new file mode 100644 index 0000000000000000000000000000000000000000..c5fecc3a65f915cc9bfb03a3da5259e0afccbfa4 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_total_flos.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_loss.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..dffa3be6a754f69b7976741b300d4d4363e09b63 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_runtime.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..f9d9c9662829759277624a2cecdc2ec7459e3d99 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_runtime.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_samples_per_second.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_samples_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..dfb42639b5efe31a7c5a81ffe15ed5c6c4b3a084 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_samples_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_speed(iter_s).png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_speed(iter_s).png new file mode 100644 index 0000000000000000000000000000000000000000..5d8cb12c3153cc3153386986d096290961c346e1 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_speed(iter_s).png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_steps_per_second.png b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_steps_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..af6310b322c918247b5989331bfd4a714a7bce6e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/images/train_train_steps_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/logging.jsonl b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/logging.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1cc7a011f81d1b52097ca6a52bab56ca3b642b79 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/logging.jsonl @@ -0,0 +1,37 @@ +{"loss": 1.33789062, "grad_norm": 9.85407958, "learning_rate": 1.667e-05, "memory(GiB)": 9.88, "train_speed(iter/s)": 0.089235, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -704.0, "logps/rejected": -416.0, "logits/chosen": -2.15625, "logits/rejected": -1.40625, "nll_loss": 0.6484375, "epoch": 0.02631579, "global_step/max_steps": "1/114", "percentage": "0.88%", "elapsed_time": "7s", "remaining_time": "14m 19s"} +{"loss": 2.10083008, "grad_norm": 14.79235594, "learning_rate": 8.333e-05, "memory(GiB)": 19.62, "train_speed(iter/s)": 0.141028, "rewards/chosen": 0.03759766, "rewards/rejected": 0.02502441, "rewards/accuracies": 0.1875, "rewards/margins": 0.01247406, "logps/chosen": -721.0, "logps/rejected": -575.5, "logits/chosen": -1.59570312, "logits/rejected": -1.57617188, "nll_loss": 1.41308594, "epoch": 0.13157895, "global_step/max_steps": "5/114", "percentage": "4.39%", "elapsed_time": "31s", "remaining_time": "11m 34s"} +{"loss": 2.03017578, "grad_norm": 6.72834433, "learning_rate": 9.966e-05, "memory(GiB)": 46.15, "train_speed(iter/s)": 0.141858, "rewards/chosen": 1.01093745, "rewards/rejected": 0.34417725, "rewards/accuracies": 0.75, "rewards/margins": 0.66894531, "logps/chosen": -620.79998779, "logps/rejected": -703.20001221, "logits/chosen": -1.52499998, "logits/rejected": -1.63750005, "nll_loss": 1.55156255, "epoch": 0.26315789, "global_step/max_steps": "10/114", "percentage": "8.77%", "elapsed_time": "1m 6s", "remaining_time": "11m 35s"} +{"eval_loss": 0.7890625, "eval_runtime": 2.2882, "eval_samples_per_second": 1.748, "eval_steps_per_second": 0.437, "eval_rewards/chosen": 3.4375, "eval_rewards/rejected": 1.3515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 2.09375, "eval_logps/chosen": -256.0, "eval_logps/rejected": -1096.0, "eval_logits/chosen": -0.9375, "eval_logits/rejected": -1.828125, "eval_nll_loss": 0.65625, "epoch": 0.26315789, "global_step/max_steps": "10/114", "percentage": "8.77%", "elapsed_time": "1m 9s", "remaining_time": "11m 59s"} +{"loss": 1.04770508, "grad_norm": 2.34401166, "learning_rate": 9.83e-05, "memory(GiB)": 46.15, "train_speed(iter/s)": 0.146389, "rewards/chosen": 4.15937519, "rewards/rejected": 1.60468745, "rewards/accuracies": 0.92500001, "rewards/margins": 2.5609374, "logps/chosen": -641.20001221, "logps/rejected": -579.59997559, "logits/chosen": -1.71718752, "logits/rejected": -1.67031252, "nll_loss": 0.86406249, "epoch": 0.39473684, "global_step/max_steps": "15/114", "percentage": "13.16%", "elapsed_time": "1m 38s", "remaining_time": "10m 52s"} +{"loss": 0.83625488, "grad_norm": 1.61776931, "learning_rate": 9.591e-05, "memory(GiB)": 46.15, "train_speed(iter/s)": 0.148317, "rewards/chosen": 8.36250019, "rewards/rejected": 3.8843751, "rewards/accuracies": 1.0, "rewards/margins": 4.4749999, "logps/chosen": -400.3999939, "logps/rejected": -564.40002441, "logits/chosen": -1.65156245, "logits/rejected": -1.734375, "nll_loss": 0.80312502, "epoch": 0.52631579, "global_step/max_steps": "20/114", "percentage": "17.54%", "elapsed_time": "2m 11s", "remaining_time": "10m 16s"} +{"eval_loss": 0.38867188, "eval_runtime": 2.32, "eval_samples_per_second": 1.724, "eval_steps_per_second": 0.431, "eval_rewards/chosen": 11.125, "eval_rewards/rejected": 6.0, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 5.125, "eval_logps/chosen": -179.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -0.8828125, "eval_logits/rejected": -1.828125, "eval_nll_loss": 0.37695312, "epoch": 0.52631579, "global_step/max_steps": "20/114", "percentage": "17.54%", "elapsed_time": "2m 13s", "remaining_time": "10m 27s"} +{"loss": 0.57784424, "grad_norm": 1.29770033, "learning_rate": 9.256e-05, "memory(GiB)": 46.15, "train_speed(iter/s)": 0.146297, "rewards/chosen": 11.08749962, "rewards/rejected": 4.5374999, "rewards/accuracies": 1.0, "rewards/margins": 6.55000019, "logps/chosen": -560.0, "logps/rejected": -661.59997559, "logits/chosen": -1.65312505, "logits/rejected": -1.64687502, "nll_loss": 0.57109374, "epoch": 0.65789474, "global_step/max_steps": "25/114", "percentage": "21.93%", "elapsed_time": "2m 47s", "remaining_time": "9m 55s"} +{"loss": 0.51394043, "grad_norm": 0.75517176, "learning_rate": 8.83e-05, "memory(GiB)": 46.15, "train_speed(iter/s)": 0.147047, "rewards/chosen": 11.92500019, "rewards/rejected": 3.2593751, "rewards/accuracies": 1.0, "rewards/margins": 8.67500019, "logps/chosen": -405.20001221, "logps/rejected": -594.0, "logits/chosen": -1.54531252, "logits/rejected": -1.78125, "nll_loss": 0.51367188, "epoch": 0.78947368, "global_step/max_steps": "30/114", "percentage": "26.32%", "elapsed_time": "3m 20s", "remaining_time": "9m 21s"} +{"eval_loss": 0.34790039, "eval_runtime": 2.3228, "eval_samples_per_second": 1.722, "eval_steps_per_second": 0.431, "eval_rewards/chosen": 12.3125, "eval_rewards/rejected": 2.75, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.5625, "eval_logps/chosen": -168.0, "eval_logps/rejected": -1088.0, "eval_logits/chosen": -1.015625, "eval_logits/rejected": -1.8203125, "eval_nll_loss": 0.34765625, "epoch": 0.78947368, "global_step/max_steps": "30/114", "percentage": "26.32%", "elapsed_time": "3m 22s", "remaining_time": "9m 27s"} +{"loss": 0.56610107, "grad_norm": 0.81901811, "learning_rate": 8.324e-05, "memory(GiB)": 46.15, "train_speed(iter/s)": 0.145437, "rewards/chosen": 13.86250019, "rewards/rejected": 3.7906251, "rewards/accuracies": 1.0, "rewards/margins": 10.08749962, "logps/chosen": -620.40002441, "logps/rejected": -643.59997559, "logits/chosen": -1.74687505, "logits/rejected": -1.73593748, "nll_loss": 0.56562501, "epoch": 0.92105263, "global_step/max_steps": "35/114", "percentage": "30.70%", "elapsed_time": "3m 57s", "remaining_time": "8m 55s"} +{"loss": 0.53041992, "grad_norm": 0.74847295, "learning_rate": 7.748e-05, "memory(GiB)": 46.15, "train_speed(iter/s)": 0.146795, "rewards/chosen": 14.42500019, "rewards/rejected": 2.8515625, "rewards/accuracies": 1.0, "rewards/margins": 11.5625, "logps/chosen": -469.6000061, "logps/rejected": -676.0, "logits/chosen": -1.68437505, "logits/rejected": -1.71562505, "nll_loss": 0.58281249, "epoch": 1.05263158, "global_step/max_steps": "40/114", "percentage": "35.09%", "elapsed_time": "4m 28s", "remaining_time": "8m 17s"} +{"eval_loss": 0.32592773, "eval_runtime": 2.3449, "eval_samples_per_second": 1.706, "eval_steps_per_second": 0.426, "eval_rewards/chosen": 13.375, "eval_rewards/rejected": 0.30078125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.0625, "eval_logps/chosen": -157.0, "eval_logps/rejected": -1112.0, "eval_logits/chosen": -1.0078125, "eval_logits/rejected": -1.75, "eval_nll_loss": 0.32617188, "epoch": 1.05263158, "global_step/max_steps": "40/114", "percentage": "35.09%", "elapsed_time": "4m 31s", "remaining_time": "8m 21s"} +{"loss": 0.44418945, "grad_norm": 0.51391783, "learning_rate": 7.113e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.145239, "rewards/chosen": 15.30000019, "rewards/rejected": 0.009375, "rewards/accuracies": 1.0, "rewards/margins": 15.30000019, "logps/chosen": -441.6000061, "logps/rejected": -712.40002441, "logits/chosen": -1.57968748, "logits/rejected": -1.61093748, "nll_loss": 0.44374999, "epoch": 1.18421053, "global_step/max_steps": "45/114", "percentage": "39.47%", "elapsed_time": "5m 6s", "remaining_time": "7m 49s"} +{"loss": 0.50092163, "grad_norm": 0.47040818, "learning_rate": 6.434e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.146472, "rewards/chosen": 16.22500038, "rewards/rejected": 0.0425293, "rewards/accuracies": 1.0, "rewards/margins": 16.20000076, "logps/chosen": -492.3999939, "logps/rejected": -671.20001221, "logits/chosen": -1.58906245, "logits/rejected": -1.71406245, "nll_loss": 0.50117189, "epoch": 1.31578947, "global_step/max_steps": "50/114", "percentage": "43.86%", "elapsed_time": "5m 37s", "remaining_time": "7m 12s"} +{"eval_loss": 0.3137207, "eval_runtime": 2.283, "eval_samples_per_second": 1.752, "eval_steps_per_second": 0.438, "eval_rewards/chosen": 13.9375, "eval_rewards/rejected": -0.69921875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.625, "eval_logps/chosen": -152.0, "eval_logps/rejected": -1120.0, "eval_logits/chosen": -1.0, "eval_logits/rejected": -1.65625, "eval_nll_loss": 0.31445312, "epoch": 1.31578947, "global_step/max_steps": "50/114", "percentage": "43.86%", "elapsed_time": "5m 40s", "remaining_time": "7m 15s"} +{"loss": 0.47770996, "grad_norm": 0.3179924, "learning_rate": 5.725e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.147538, "rewards/chosen": 16.66250038, "rewards/rejected": 0.69062501, "rewards/accuracies": 1.0, "rewards/margins": 15.98750019, "logps/chosen": -471.20001221, "logps/rejected": -620.79998779, "logits/chosen": -1.44375002, "logits/rejected": -1.52656245, "nll_loss": 0.47851562, "epoch": 1.44736842, "global_step/max_steps": "55/114", "percentage": "48.25%", "elapsed_time": "6m 9s", "remaining_time": "6m 36s"} +{"loss": 0.43349609, "grad_norm": 0.20661664, "learning_rate": 5e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.148871, "rewards/chosen": 17.04999924, "rewards/rejected": 0.65078127, "rewards/accuracies": 1.0, "rewards/margins": 16.42499924, "logps/chosen": -514.79998779, "logps/rejected": -496.0, "logits/chosen": -1.58749998, "logits/rejected": -1.35781252, "nll_loss": 0.43359375, "epoch": 1.57894737, "global_step/max_steps": "60/114", "percentage": "52.63%", "elapsed_time": "6m 39s", "remaining_time": "5m 59s"} +{"eval_loss": 0.31396484, "eval_runtime": 2.3335, "eval_samples_per_second": 1.714, "eval_steps_per_second": 0.429, "eval_rewards/chosen": 13.875, "eval_rewards/rejected": -1.0, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.875, "eval_logps/chosen": -152.0, "eval_logps/rejected": -1120.0, "eval_logits/chosen": -0.78125, "eval_logits/rejected": -1.6484375, "eval_nll_loss": 0.31445312, "epoch": 1.57894737, "global_step/max_steps": "60/114", "percentage": "52.63%", "elapsed_time": "6m 41s", "remaining_time": "6m 1s"} +{"loss": 0.40793152, "grad_norm": 0.51019906, "learning_rate": 4.275e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.148763, "rewards/chosen": 16.98749924, "rewards/rejected": -0.40937501, "rewards/accuracies": 1.0, "rewards/margins": 17.39999962, "logps/chosen": -452.3999939, "logps/rejected": -550.79998779, "logits/chosen": -1.49062502, "logits/rejected": -1.51874995, "nll_loss": 0.40820312, "epoch": 1.71052632, "global_step/max_steps": "65/114", "percentage": "57.02%", "elapsed_time": "7m 13s", "remaining_time": "5m 26s"} +{"loss": 0.44562836, "grad_norm": 0.40382498, "learning_rate": 3.566e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.14889, "rewards/chosen": 17.13750076, "rewards/rejected": 0.24648437, "rewards/accuracies": 1.0, "rewards/margins": 16.89999962, "logps/chosen": -402.20001221, "logps/rejected": -646.40002441, "logits/chosen": -1.375, "logits/rejected": -1.53281248, "nll_loss": 0.44570312, "epoch": 1.84210526, "global_step/max_steps": "70/114", "percentage": "61.40%", "elapsed_time": "7m 46s", "remaining_time": "4m 53s"} +{"eval_loss": 0.31274414, "eval_runtime": 2.2858, "eval_samples_per_second": 1.75, "eval_steps_per_second": 0.437, "eval_rewards/chosen": 13.9375, "eval_rewards/rejected": 0.09960938, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.875, "eval_logps/chosen": -151.0, "eval_logps/rejected": -1112.0, "eval_logits/chosen": -0.71875, "eval_logits/rejected": -1.6484375, "eval_nll_loss": 0.3125, "epoch": 1.84210526, "global_step/max_steps": "70/114", "percentage": "61.40%", "elapsed_time": "7m 48s", "remaining_time": "4m 54s"} +{"loss": 0.48649902, "grad_norm": 0.64577238, "learning_rate": 2.887e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.148551, "rewards/chosen": 17.375, "rewards/rejected": 0.58320314, "rewards/accuracies": 1.0, "rewards/margins": 16.82500076, "logps/chosen": -511.20001221, "logps/rejected": -682.0, "logits/chosen": -1.37656248, "logits/rejected": -1.41250002, "nll_loss": 0.48710936, "epoch": 1.97368421, "global_step/max_steps": "75/114", "percentage": "65.79%", "elapsed_time": "8m 21s", "remaining_time": "4m 20s"} +{"loss": 0.46367188, "grad_norm": 0.55304127, "learning_rate": 2.252e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.147818, "rewards/chosen": 16.875, "rewards/rejected": -0.63339841, "rewards/accuracies": 1.0, "rewards/margins": 17.51250076, "logps/chosen": -432.3999939, "logps/rejected": -624.0, "logits/chosen": -1.40937495, "logits/rejected": -1.41718745, "nll_loss": 0.47734374, "epoch": 2.10526316, "global_step/max_steps": "80/114", "percentage": "70.18%", "elapsed_time": "8m 57s", "remaining_time": "3m 48s"} +{"eval_loss": 0.30737305, "eval_runtime": 2.3551, "eval_samples_per_second": 1.698, "eval_steps_per_second": 0.425, "eval_rewards/chosen": 14.125, "eval_rewards/rejected": 0.796875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.69921875, "eval_logits/rejected": -1.609375, "eval_nll_loss": 0.30664062, "epoch": 2.10526316, "global_step/max_steps": "80/114", "percentage": "70.18%", "elapsed_time": "8m 59s", "remaining_time": "3m 49s"} +{"loss": 0.40716095, "grad_norm": 0.48444269, "learning_rate": 1.676e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.147889, "rewards/chosen": 17.16250038, "rewards/rejected": 1.38515627, "rewards/accuracies": 1.0, "rewards/margins": 15.80000019, "logps/chosen": -392.3999939, "logps/rejected": -744.79998779, "logits/chosen": -1.25625002, "logits/rejected": -1.53750002, "nll_loss": 0.40703124, "epoch": 2.23684211, "global_step/max_steps": "85/114", "percentage": "74.56%", "elapsed_time": "9m 31s", "remaining_time": "3m 14s"} +{"loss": 0.42355957, "grad_norm": 0.31612081, "learning_rate": 1.17e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.149275, "rewards/chosen": 18.54999924, "rewards/rejected": 1.6085937, "rewards/accuracies": 1.0, "rewards/margins": 16.88750076, "logps/chosen": -483.20001221, "logps/rejected": -590.0, "logits/chosen": -1.4296875, "logits/rejected": -1.23749995, "nll_loss": 0.42382812, "epoch": 2.36842105, "global_step/max_steps": "90/114", "percentage": "78.95%", "elapsed_time": "9m 59s", "remaining_time": "2m 39s"} +{"eval_loss": 0.30639648, "eval_runtime": 2.3, "eval_samples_per_second": 1.739, "eval_steps_per_second": 0.435, "eval_rewards/chosen": 14.1875, "eval_rewards/rejected": 0.796875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.6953125, "eval_logits/rejected": -1.6015625, "eval_nll_loss": 0.30664062, "epoch": 2.36842105, "global_step/max_steps": "90/114", "percentage": "78.95%", "elapsed_time": "10m 1s", "remaining_time": "2m 40s"} +{"loss": 0.46888428, "grad_norm": 0.24964157, "learning_rate": 7.44e-06, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.148344, "rewards/chosen": 19.375, "rewards/rejected": 0.97011721, "rewards/accuracies": 1.0, "rewards/margins": 18.42499924, "logps/chosen": -524.40002441, "logps/rejected": -720.0, "logits/chosen": -1.36093748, "logits/rejected": -1.46718752, "nll_loss": 0.46875, "epoch": 2.5, "global_step/max_steps": "95/114", "percentage": "83.33%", "elapsed_time": "10m 36s", "remaining_time": "2m 7s"} +{"loss": 0.4006958, "grad_norm": 0.42005409, "learning_rate": 4.09e-06, "memory(GiB)": 56.86, "train_speed(iter/s)": 0.149184, "rewards/chosen": 16.79999924, "rewards/rejected": -0.42812499, "rewards/accuracies": 1.0, "rewards/margins": 17.22500038, "logps/chosen": -405.3999939, "logps/rejected": -564.40002441, "logits/chosen": -1.39218748, "logits/rejected": -1.546875, "nll_loss": 0.40078124, "epoch": 2.63157895, "global_step/max_steps": "100/114", "percentage": "87.72%", "elapsed_time": "11m 6s", "remaining_time": "1m 33s"} +{"eval_loss": 0.30712891, "eval_runtime": 2.2948, "eval_samples_per_second": 1.743, "eval_steps_per_second": 0.436, "eval_rewards/chosen": 14.125, "eval_rewards/rejected": 0.796875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.69140625, "eval_logits/rejected": -1.6015625, "eval_nll_loss": 0.30664062, "epoch": 2.63157895, "global_step/max_steps": "100/114", "percentage": "87.72%", "elapsed_time": "11m 9s", "remaining_time": "1m 33s"} +{"loss": 0.41813049, "grad_norm": 0.57014055, "learning_rate": 1.7e-06, "memory(GiB)": 56.86, "train_speed(iter/s)": 0.148863, "rewards/chosen": 17.21249962, "rewards/rejected": 0.89570314, "rewards/accuracies": 1.0, "rewards/margins": 16.3125, "logps/chosen": -432.6000061, "logps/rejected": -567.20001221, "logits/chosen": -1.41250002, "logits/rejected": -1.515625, "nll_loss": 0.41835937, "epoch": 2.76315789, "global_step/max_steps": "105/114", "percentage": "92.11%", "elapsed_time": "11m 41s", "remaining_time": "1m 0s"} +{"loss": 0.39621582, "grad_norm": 0.5114481, "learning_rate": 3.4e-07, "memory(GiB)": 56.86, "train_speed(iter/s)": 0.149183, "rewards/chosen": 17.85000038, "rewards/rejected": 0.2234375, "rewards/accuracies": 1.0, "rewards/margins": 17.63750076, "logps/chosen": -452.3999939, "logps/rejected": -605.59997559, "logits/chosen": -1.34843755, "logits/rejected": -1.53437495, "nll_loss": 0.39648438, "epoch": 2.89473684, "global_step/max_steps": "110/114", "percentage": "96.49%", "elapsed_time": "12m 13s", "remaining_time": "26s"} +{"eval_loss": 0.30761719, "eval_runtime": 2.323, "eval_samples_per_second": 1.722, "eval_steps_per_second": 0.43, "eval_rewards/chosen": 14.1875, "eval_rewards/rejected": 0.796875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.6875, "eval_logits/rejected": -1.59375, "eval_nll_loss": 0.30859375, "epoch": 2.89473684, "global_step/max_steps": "110/114", "percentage": "96.49%", "elapsed_time": "12m 16s", "remaining_time": "26s"} +{"eval_loss": 0.30688477, "eval_runtime": 2.2759, "eval_samples_per_second": 1.758, "eval_steps_per_second": 0.439, "eval_rewards/chosen": 14.1875, "eval_rewards/rejected": 0.796875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.6875, "eval_logits/rejected": -1.59375, "eval_nll_loss": 0.30664062, "epoch": 3.0, "global_step/max_steps": "114/114", "percentage": "100.00%", "elapsed_time": "12m 48s", "remaining_time": "0s"} +{"train_runtime": 770.2199, "train_samples_per_second": 1.161, "train_steps_per_second": 0.148, "total_flos": 359754023305216.0, "train_loss": 0.64872822, "epoch": 3.0, "global_step/max_steps": "114/114", "percentage": "100.00%", "elapsed_time": "12m 50s", "remaining_time": "0s"} +{"train_dataset": "1698.815436±897.000106, min=182.000000, max=4081.000000, size=298", "val_dataset": "1637.250000±797.581461, min=755.000000, max=2485.000000, size=4", "model_parameter_info": "PeftModelForCausalLM: 14804.4401M Params (34.4064M Trainable [0.2324%]), 0.0001M Buffers.", "last_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-114", "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/checkpoint-90", "best_metric": 0.30639648, "global_step": 114, "log_history": [{"loss": 1.337890625, "grad_norm": 9.854079581094561, "learning_rate": 1.6666666666666667e-05, "memory(GiB)": 9.88, "train_speed(iter/s)": 0.089235, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -704.0, "logps/rejected": -416.0, "logits/chosen": -2.15625, "logits/rejected": -1.40625, "nll_loss": 0.6484375, "epoch": 0.02631578947368421, "step": 1}, {"loss": 2.100830078125, "grad_norm": 14.792355942314634, "learning_rate": 8.333333333333334e-05, "memory(GiB)": 19.62, "train_speed(iter/s)": 0.141028, "rewards/chosen": 0.03759765625, "rewards/rejected": 0.0250244140625, "rewards/accuracies": 0.1875, "rewards/margins": 0.01247406005859375, "logps/chosen": -721.0, "logps/rejected": -575.5, "logits/chosen": -1.595703125, "logits/rejected": -1.576171875, "nll_loss": 1.4130859375, "epoch": 0.13157894736842105, "step": 5}, {"loss": 2.03017578125, "grad_norm": 6.728344330769066, "learning_rate": 9.966191788709716e-05, "memory(GiB)": 46.15, "train_speed(iter/s)": 0.141858, "rewards/chosen": 1.0109374523162842, "rewards/rejected": 0.34417724609375, "rewards/accuracies": 0.75, "rewards/margins": 0.6689453125, "logps/chosen": -620.7999877929688, "logps/rejected": -703.2000122070312, "logits/chosen": -1.524999976158142, "logits/rejected": -1.6375000476837158, "nll_loss": 1.5515625476837158, "epoch": 0.2631578947368421, "step": 10}, {"eval_loss": 0.7890625, "eval_runtime": 2.2882, "eval_samples_per_second": 1.748, "eval_steps_per_second": 0.437, "eval_rewards/chosen": 3.4375, "eval_rewards/rejected": 1.3515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 2.09375, "eval_logps/chosen": -256.0, "eval_logps/rejected": -1096.0, "eval_logits/chosen": -0.9375, "eval_logits/rejected": -1.828125, "eval_nll_loss": 0.65625, "epoch": 0.2631578947368421, "step": 10}, {"loss": 1.047705078125, "grad_norm": 2.3440116641074753, "learning_rate": 9.829629131445342e-05, "memory(GiB)": 46.15, "train_speed(iter/s)": 0.146389, "rewards/chosen": 4.159375190734863, "rewards/rejected": 1.6046874523162842, "rewards/accuracies": 0.925000011920929, "rewards/margins": 2.5609374046325684, "logps/chosen": -641.2000122070312, "logps/rejected": -579.5999755859375, "logits/chosen": -1.717187523841858, "logits/rejected": -1.670312523841858, "nll_loss": 0.864062488079071, "epoch": 0.39473684210526316, "step": 15}, {"loss": 0.8362548828125, "grad_norm": 1.6177693098167876, "learning_rate": 9.591080534401371e-05, "memory(GiB)": 46.15, "train_speed(iter/s)": 0.148317, "rewards/chosen": 8.362500190734863, "rewards/rejected": 3.8843750953674316, "rewards/accuracies": 1.0, "rewards/margins": 4.474999904632568, "logps/chosen": -400.3999938964844, "logps/rejected": -564.4000244140625, "logits/chosen": -1.6515624523162842, "logits/rejected": -1.734375, "nll_loss": 0.8031250238418579, "epoch": 0.5263157894736842, "step": 20}, {"eval_loss": 0.388671875, "eval_runtime": 2.32, "eval_samples_per_second": 1.724, "eval_steps_per_second": 0.431, "eval_rewards/chosen": 11.125, "eval_rewards/rejected": 6.0, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 5.125, "eval_logps/chosen": -179.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -0.8828125, "eval_logits/rejected": -1.828125, "eval_nll_loss": 0.376953125, "epoch": 0.5263157894736842, "step": 20}, {"loss": 0.57784423828125, "grad_norm": 1.2977003336577986, "learning_rate": 9.255583362184999e-05, "memory(GiB)": 46.15, "train_speed(iter/s)": 0.146297, "rewards/chosen": 11.087499618530273, "rewards/rejected": 4.537499904632568, "rewards/accuracies": 1.0, "rewards/margins": 6.550000190734863, "logps/chosen": -560.0, "logps/rejected": -661.5999755859375, "logits/chosen": -1.6531250476837158, "logits/rejected": -1.646875023841858, "nll_loss": 0.571093738079071, "epoch": 0.6578947368421053, "step": 25}, {"loss": 0.5139404296875, "grad_norm": 0.7551717618560747, "learning_rate": 8.83022221559489e-05, "memory(GiB)": 46.15, "train_speed(iter/s)": 0.147047, "rewards/chosen": 11.925000190734863, "rewards/rejected": 3.2593750953674316, "rewards/accuracies": 1.0, "rewards/margins": 8.675000190734863, "logps/chosen": -405.20001220703125, "logps/rejected": -594.0, "logits/chosen": -1.545312523841858, "logits/rejected": -1.78125, "nll_loss": 0.513671875, "epoch": 0.7894736842105263, "step": 30}, {"eval_loss": 0.347900390625, "eval_runtime": 2.3228, "eval_samples_per_second": 1.722, "eval_steps_per_second": 0.431, "eval_rewards/chosen": 12.3125, "eval_rewards/rejected": 2.75, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.5625, "eval_logps/chosen": -168.0, "eval_logps/rejected": -1088.0, "eval_logits/chosen": -1.015625, "eval_logits/rejected": -1.8203125, "eval_nll_loss": 0.34765625, "epoch": 0.7894736842105263, "step": 30}, {"loss": 0.56610107421875, "grad_norm": 0.8190181064971851, "learning_rate": 8.323979328069689e-05, "memory(GiB)": 46.15, "train_speed(iter/s)": 0.145437, "rewards/chosen": 13.862500190734863, "rewards/rejected": 3.7906250953674316, "rewards/accuracies": 1.0, "rewards/margins": 10.087499618530273, "logps/chosen": -620.4000244140625, "logps/rejected": -643.5999755859375, "logits/chosen": -1.7468750476837158, "logits/rejected": -1.735937476158142, "nll_loss": 0.565625011920929, "epoch": 0.9210526315789473, "step": 35}, {"loss": 0.530419921875, "grad_norm": 0.7484729451276256, "learning_rate": 7.74754489035403e-05, "memory(GiB)": 46.15, "train_speed(iter/s)": 0.146795, "rewards/chosen": 14.425000190734863, "rewards/rejected": 2.8515625, "rewards/accuracies": 1.0, "rewards/margins": 11.5625, "logps/chosen": -469.6000061035156, "logps/rejected": -676.0, "logits/chosen": -1.6843750476837158, "logits/rejected": -1.7156250476837158, "nll_loss": 0.582812488079071, "epoch": 1.0526315789473684, "step": 40}, {"eval_loss": 0.325927734375, "eval_runtime": 2.3449, "eval_samples_per_second": 1.706, "eval_steps_per_second": 0.426, "eval_rewards/chosen": 13.375, "eval_rewards/rejected": 0.30078125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.0625, "eval_logps/chosen": -157.0, "eval_logps/rejected": -1112.0, "eval_logits/chosen": -1.0078125, "eval_logits/rejected": -1.75, "eval_nll_loss": 0.326171875, "epoch": 1.0526315789473684, "step": 40}, {"loss": 0.444189453125, "grad_norm": 0.513917830071538, "learning_rate": 7.113091308703498e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.145239, "rewards/chosen": 15.300000190734863, "rewards/rejected": 0.00937500037252903, "rewards/accuracies": 1.0, "rewards/margins": 15.300000190734863, "logps/chosen": -441.6000061035156, "logps/rejected": -712.4000244140625, "logits/chosen": -1.579687476158142, "logits/rejected": -1.610937476158142, "nll_loss": 0.4437499940395355, "epoch": 1.1842105263157894, "step": 45}, {"loss": 0.500921630859375, "grad_norm": 0.4704081803690242, "learning_rate": 6.434016163555452e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.146472, "rewards/chosen": 16.225000381469727, "rewards/rejected": 0.04252929612994194, "rewards/accuracies": 1.0, "rewards/margins": 16.200000762939453, "logps/chosen": -492.3999938964844, "logps/rejected": -671.2000122070312, "logits/chosen": -1.5890624523162842, "logits/rejected": -1.7140624523162842, "nll_loss": 0.501171886920929, "epoch": 1.3157894736842106, "step": 50}, {"eval_loss": 0.313720703125, "eval_runtime": 2.283, "eval_samples_per_second": 1.752, "eval_steps_per_second": 0.438, "eval_rewards/chosen": 13.9375, "eval_rewards/rejected": -0.69921875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.625, "eval_logps/chosen": -152.0, "eval_logps/rejected": -1120.0, "eval_logits/chosen": -1.0, "eval_logits/rejected": -1.65625, "eval_nll_loss": 0.314453125, "epoch": 1.3157894736842106, "step": 50}, {"loss": 0.4777099609375, "grad_norm": 0.31799240114671307, "learning_rate": 5.724659296536233e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.147538, "rewards/chosen": 16.662500381469727, "rewards/rejected": 0.690625011920929, "rewards/accuracies": 1.0, "rewards/margins": 15.987500190734863, "logps/chosen": -471.20001220703125, "logps/rejected": -620.7999877929688, "logits/chosen": -1.443750023841858, "logits/rejected": -1.5265624523162842, "nll_loss": 0.478515625, "epoch": 1.4473684210526316, "step": 55}, {"loss": 0.43349609375, "grad_norm": 0.20661664017833653, "learning_rate": 5e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.148871, "rewards/chosen": 17.049999237060547, "rewards/rejected": 0.6507812738418579, "rewards/accuracies": 1.0, "rewards/margins": 16.424999237060547, "logps/chosen": -514.7999877929688, "logps/rejected": -496.0, "logits/chosen": -1.587499976158142, "logits/rejected": -1.357812523841858, "nll_loss": 0.43359375, "epoch": 1.5789473684210527, "step": 60}, {"eval_loss": 0.31396484375, "eval_runtime": 2.3335, "eval_samples_per_second": 1.714, "eval_steps_per_second": 0.429, "eval_rewards/chosen": 13.875, "eval_rewards/rejected": -1.0, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.875, "eval_logps/chosen": -152.0, "eval_logps/rejected": -1120.0, "eval_logits/chosen": -0.78125, "eval_logits/rejected": -1.6484375, "eval_nll_loss": 0.314453125, "epoch": 1.5789473684210527, "step": 60}, {"loss": 0.4079315185546875, "grad_norm": 0.5101990627724774, "learning_rate": 4.275340703463767e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.148763, "rewards/chosen": 16.987499237060547, "rewards/rejected": -0.40937501192092896, "rewards/accuracies": 1.0, "rewards/margins": 17.399999618530273, "logps/chosen": -452.3999938964844, "logps/rejected": -550.7999877929688, "logits/chosen": -1.490625023841858, "logits/rejected": -1.5187499523162842, "nll_loss": 0.408203125, "epoch": 1.7105263157894737, "step": 65}, {"loss": 0.44562835693359376, "grad_norm": 0.40382497884369994, "learning_rate": 3.5659838364445505e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.14889, "rewards/chosen": 17.137500762939453, "rewards/rejected": 0.24648436903953552, "rewards/accuracies": 1.0, "rewards/margins": 16.899999618530273, "logps/chosen": -402.20001220703125, "logps/rejected": -646.4000244140625, "logits/chosen": -1.375, "logits/rejected": -1.532812476158142, "nll_loss": 0.4457031190395355, "epoch": 1.8421052631578947, "step": 70}, {"eval_loss": 0.312744140625, "eval_runtime": 2.2858, "eval_samples_per_second": 1.75, "eval_steps_per_second": 0.437, "eval_rewards/chosen": 13.9375, "eval_rewards/rejected": 0.099609375, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.875, "eval_logps/chosen": -151.0, "eval_logps/rejected": -1112.0, "eval_logits/chosen": -0.71875, "eval_logits/rejected": -1.6484375, "eval_nll_loss": 0.3125, "epoch": 1.8421052631578947, "step": 70}, {"loss": 0.4864990234375, "grad_norm": 0.6457723772811964, "learning_rate": 2.886908691296504e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.148551, "rewards/chosen": 17.375, "rewards/rejected": 0.583203136920929, "rewards/accuracies": 1.0, "rewards/margins": 16.825000762939453, "logps/chosen": -511.20001220703125, "logps/rejected": -682.0, "logits/chosen": -1.376562476158142, "logits/rejected": -1.412500023841858, "nll_loss": 0.48710936307907104, "epoch": 1.973684210526316, "step": 75}, {"loss": 0.463671875, "grad_norm": 0.5530412658330373, "learning_rate": 2.25245510964597e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.147818, "rewards/chosen": 16.875, "rewards/rejected": -0.6333984136581421, "rewards/accuracies": 1.0, "rewards/margins": 17.512500762939453, "logps/chosen": -432.3999938964844, "logps/rejected": -624.0, "logits/chosen": -1.4093749523162842, "logits/rejected": -1.4171874523162842, "nll_loss": 0.47734373807907104, "epoch": 2.1052631578947367, "step": 80}, {"eval_loss": 0.307373046875, "eval_runtime": 2.3551, "eval_samples_per_second": 1.698, "eval_steps_per_second": 0.425, "eval_rewards/chosen": 14.125, "eval_rewards/rejected": 0.796875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.69921875, "eval_logits/rejected": -1.609375, "eval_nll_loss": 0.306640625, "epoch": 2.1052631578947367, "step": 80}, {"loss": 0.40716094970703126, "grad_norm": 0.4844426897846391, "learning_rate": 1.6760206719303105e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.147889, "rewards/chosen": 17.162500381469727, "rewards/rejected": 1.385156273841858, "rewards/accuracies": 1.0, "rewards/margins": 15.800000190734863, "logps/chosen": -392.3999938964844, "logps/rejected": -744.7999877929688, "logits/chosen": -1.256250023841858, "logits/rejected": -1.537500023841858, "nll_loss": 0.40703123807907104, "epoch": 2.236842105263158, "step": 85}, {"loss": 0.4235595703125, "grad_norm": 0.31612081336180875, "learning_rate": 1.1697777844051105e-05, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.149275, "rewards/chosen": 18.549999237060547, "rewards/rejected": 1.6085937023162842, "rewards/accuracies": 1.0, "rewards/margins": 16.887500762939453, "logps/chosen": -483.20001220703125, "logps/rejected": -590.0, "logits/chosen": -1.4296875, "logits/rejected": -1.2374999523162842, "nll_loss": 0.423828125, "epoch": 2.3684210526315788, "step": 90}, {"eval_loss": 0.306396484375, "eval_runtime": 2.3, "eval_samples_per_second": 1.739, "eval_steps_per_second": 0.435, "eval_rewards/chosen": 14.1875, "eval_rewards/rejected": 0.796875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.6953125, "eval_logits/rejected": -1.6015625, "eval_nll_loss": 0.306640625, "epoch": 2.3684210526315788, "step": 90}, {"loss": 0.46888427734375, "grad_norm": 0.24964156861715608, "learning_rate": 7.444166378150013e-06, "memory(GiB)": 47.61, "train_speed(iter/s)": 0.148344, "rewards/chosen": 19.375, "rewards/rejected": 0.9701172113418579, "rewards/accuracies": 1.0, "rewards/margins": 18.424999237060547, "logps/chosen": -524.4000244140625, "logps/rejected": -720.0, "logits/chosen": -1.360937476158142, "logits/rejected": -1.467187523841858, "nll_loss": 0.46875, "epoch": 2.5, "step": 95}, {"loss": 0.40069580078125, "grad_norm": 0.42005408649300036, "learning_rate": 4.089194655986306e-06, "memory(GiB)": 56.86, "train_speed(iter/s)": 0.149184, "rewards/chosen": 16.799999237060547, "rewards/rejected": -0.4281249940395355, "rewards/accuracies": 1.0, "rewards/margins": 17.225000381469727, "logps/chosen": -405.3999938964844, "logps/rejected": -564.4000244140625, "logits/chosen": -1.392187476158142, "logits/rejected": -1.546875, "nll_loss": 0.4007812440395355, "epoch": 2.6315789473684212, "step": 100}, {"eval_loss": 0.30712890625, "eval_runtime": 2.2948, "eval_samples_per_second": 1.743, "eval_steps_per_second": 0.436, "eval_rewards/chosen": 14.125, "eval_rewards/rejected": 0.796875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.69140625, "eval_logits/rejected": -1.6015625, "eval_nll_loss": 0.306640625, "epoch": 2.6315789473684212, "step": 100}, {"loss": 0.4181304931640625, "grad_norm": 0.5701405477605673, "learning_rate": 1.70370868554659e-06, "memory(GiB)": 56.86, "train_speed(iter/s)": 0.148863, "rewards/chosen": 17.212499618530273, "rewards/rejected": 0.895703136920929, "rewards/accuracies": 1.0, "rewards/margins": 16.3125, "logps/chosen": -432.6000061035156, "logps/rejected": -567.2000122070312, "logits/chosen": -1.412500023841858, "logits/rejected": -1.515625, "nll_loss": 0.4183593690395355, "epoch": 2.763157894736842, "step": 105}, {"loss": 0.3962158203125, "grad_norm": 0.5114480987922484, "learning_rate": 3.380821129028489e-07, "memory(GiB)": 56.86, "train_speed(iter/s)": 0.149183, "rewards/chosen": 17.850000381469727, "rewards/rejected": 0.22343750298023224, "rewards/accuracies": 1.0, "rewards/margins": 17.637500762939453, "logps/chosen": -452.3999938964844, "logps/rejected": -605.5999755859375, "logits/chosen": -1.3484375476837158, "logits/rejected": -1.5343749523162842, "nll_loss": 0.396484375, "epoch": 2.8947368421052633, "step": 110}, {"eval_loss": 0.3076171875, "eval_runtime": 2.323, "eval_samples_per_second": 1.722, "eval_steps_per_second": 0.43, "eval_rewards/chosen": 14.1875, "eval_rewards/rejected": 0.796875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.6875, "eval_logits/rejected": -1.59375, "eval_nll_loss": 0.30859375, "epoch": 2.8947368421052633, "step": 110}, {"eval_loss": 0.306884765625, "eval_runtime": 2.2759, "eval_samples_per_second": 1.758, "eval_steps_per_second": 0.439, "eval_rewards/chosen": 14.1875, "eval_rewards/rejected": 0.796875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1104.0, "eval_logits/chosen": -0.6875, "eval_logits/rejected": -1.59375, "eval_nll_loss": 0.306640625, "epoch": 3.0, "step": 114}, {"train_runtime": 770.2199, "train_samples_per_second": 1.161, "train_steps_per_second": 0.148, "total_flos": 359754023305216.0, "train_loss": 0.6487282200863487, "epoch": 3.0, "step": 114}], "memory": 56.85546875} diff --git a/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs/events.out.tfevents.1739622241.kml-task-540432-record-10144729-prod-worker-0.6619.0 b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs/events.out.tfevents.1739622241.kml-task-540432-record-10144729-prod-worker-0.6619.0 new file mode 100644 index 0000000000000000000000000000000000000000..93857289a58e3a43b9df6c1cf6aff3e54ea3823d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-14b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-122119/runs/events.out.tfevents.1739622241.kml-task-540432-record-10144729-prod-worker-0.6619.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84366f82787da8e2f46b300532abd0dded32ef091e8e1e667b8c4726b098539b +size 36869 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/args.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/args.json new file mode 100644 index 0000000000000000000000000000000000000000..050fd8222794164e398a2b20e0ba5c0f3e7fcbc6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-32b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/README.md b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bb68a7d6caeadf83c94bf41a98174b4994de1bfb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-32b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..913db4c1f4c80fd71c1f6a73bf55b94a07b79467 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "k_proj", + "q_proj", + "gate_proj", + "o_proj", + "up_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95afdedae31e7a7e3ea23d568b204b6525156e9e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b1dc8df17ef4c56a43a2e154e3191c82ff80c0dff06bdffa77f4335df33928 +size 134337704 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/args.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/args.json new file mode 100644 index 0000000000000000000000000000000000000000..050fd8222794164e398a2b20e0ba5c0f3e7fcbc6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-32b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ebda81a9cb966dc737fb7dd4a851f26be5ae4ef --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7bda8899f21a5009e9928dd545672342a70b48de99a32994cbbfb448de72c35 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..42b72ee67abf83206b3fefbf904195073e9e13e3 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f80f018b1097258f647a793faded572a550d0c60fb9d5675809abf1da80f56f1 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e995d4441b026b57740cc20d290317eef8d60c3b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6b05d306c53510ab7b5a7d6d0e9fd763c9d83eb46ec0feb8171246a63d693d8 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8cb74c045c50834d6279cc1aac6b6f6d00417ff6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92bacec2308c8898a72a4715b8248cd9022c3efaf00c3d87592458f9fd925a48 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..abcd7516b1b792981faa80be001f44315f627875 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8622492e5910b7db69c2cc2beb2ed5ed3d19749e5501fab5d9db889cda580dba +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..228fee893b7a4bd250d29e1d2cd2be0c07cfb9d9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c224d7c3a4f8140466bf5ea76dafbf250e46daf843f8cf0b991e1a98d0c55286 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ea5e0a336cb38cbc377bd1050c7d0ffe78bea6b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26036b1a902a7206211e00ebf2e1211591d2edebe30a3fd3746c8177e763a3d2 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d47b8f62c5249a95f4be6c9543401636a88b5dcf --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e6575cc60c3f76618c64f133cf231a6512128612c498d94bc3a0f61f7bf9a5d +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c7ae4432113ad4b11cf31e67d4c4a37f9f89937 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddfc67b3a517ef961c3b5705cc7acf2914eb49d3c4dc2992884fa09449846b3e +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4be062e0eceb3e9ac20c1d41d4b0cf14db42ac9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a4df242d6635400e57556b99edee63cfff3a968ca0a2755b2ceef1f9012b05e +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b337383b9f00724e70df2259eb88dce09a19e1c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00564d9858ab14b7a140837c454f065a2420dc403617f17cb8a2e5118cd86eaa +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b9ba5870d8bb67867bda6c28f98a0ed4dbc9ca0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:329d6ca2ce6026f0136c01384891858b0084fc96c796d6096100f085374bb838 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..92251b57cb07b3bbffb2cf4e89796f689851c1bb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bee250f073179786701924268a7356463dcb5ab4bb216a0319d44c81b37259f3 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b940ead0174e2c9a9d429788757291a89c76828b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:642dc60def74ca84e35dfdf6296aa8289a65944e65a892aff15bf6e6547a71e0 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ad47e5822c97d26058f7cd02f16ce9d56ce270f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e67b4b0b03435aab670516d7b779bd2586af114df30a200bc2fd9fa37b993aec +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..508375eb6c9c97918bb0d80d1c9da8d7c15173c1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbfc7e2779ddf3c8f005358467f333da4c7d239138fd358ac7f6095abfcd63d1 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/latest b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/latest new file mode 100644 index 0000000000000000000000000000000000000000..744ae7dbad571b6f37ec6c7066549494261bb59e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/latest @@ -0,0 +1 @@ +global_step100 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..97f51b498d48145bd9cc14b35f8236b9ec95a4f7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1bec598899f9d59e70c1b4705ce420a1e0a670957b6c8153a589880068ae5a4 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..08e59ac81067b262a084604cd3392250166c2841 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60d2348aae518f4c44693db9c9b4b3a3299c556e7f0a86c188b2e4c3e364a7c +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..20a24c17b4be2ee59cd5e6682010519318a91e58 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe5a79d3bcb4ce033de360bc765e616316e3562aba25887cd85c4adbb935abf +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..54050f6cf8fb847e2a926e14a7aad2647761521a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a9d1f6e22677721841890e6a27855857e6840137650d609eb8e4ac13b71d29 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..263aae475c49b090bce43f143308192c5bf9a95b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac4ff84388a6a4fe3bcae6207c68b2ee5528fb3b6de8cc3588fe1975462aa5 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..942ed5d60ae87dce686b33da76a34db404036dc6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fce3cdf5c1b8a8a291e0c73b384e3ad5252640e21e942b44b26b8b0928ffa9 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..57789be3df3983cb8acc1500bf6470ffadb1c578 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:919e675f3bcaf4f3c8ba35cd8debf85aec3bbc3c8e5019b74431e0a314e4d37a +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..b32d6e2e7eb7148713b473b0c821a98e616ab6e6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf6479ce82b88efc6a72a8ee512162b3d0ecab972817296d38ab9c448bb8d96 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2a1fb08c48e9d34df783eb19e7c9d1caf0ed386 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ec37c3a15b8d061312402391f2fddb52d623a1416d6d2879a30f184450d844f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..48930e4b516cfbaddfd1b7c7fd768dc137c9baaa --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/trainer_state.json @@ -0,0 +1,581 @@ +{ + "best_metric": 0.27856445, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-70", + "epoch": 2.6315789473684212, + "eval_steps": 10, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 4.7678369137832615, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -1.609375, + "logits/rejected": -1.609375, + "logps/chosen": -664.0, + "logps/rejected": -370.0, + "loss": 1.2783203125, + "memory(GiB)": 14.31, + "nll_loss": 0.5859375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.061081 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 5.77482904203973, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.57421875, + "logits/rejected": -1.63671875, + "logps/chosen": -676.25, + "logps/rejected": -514.0, + "loss": 1.84375, + "memory(GiB)": 24.25, + "nll_loss": 1.1826171875, + "rewards/accuracies": 0.3125, + "rewards/chosen": 0.0674591064453125, + "rewards/margins": 0.08127593994140625, + "rewards/rejected": -0.01410675048828125, + "step": 5, + "train_speed(iter/s)": 0.081107 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 4.776080083637143, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.506250023841858, + "logits/rejected": -1.532812476158142, + "logps/chosen": -573.2000122070312, + "logps/rejected": -644.0, + "loss": 1.8857421875, + "memory(GiB)": 56.21, + "nll_loss": 1.321874976158142, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.605664074420929, + "rewards/margins": 0.3480468690395355, + "rewards/rejected": 0.25761717557907104, + "step": 10, + "train_speed(iter/s)": 0.078746 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -1.6171875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -213.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.7958984375, + "eval_nll_loss": 0.55859375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 2.703125, + "eval_rewards/margins": 1.3515625, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 4.4381, + "eval_samples_per_second": 0.901, + "eval_steps_per_second": 0.225, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.4823344294382115, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.6171875, + "logps/chosen": -602.0, + "logps/rejected": -516.7999877929688, + "loss": 1.065966796875, + "memory(GiB)": 56.21, + "nll_loss": 0.7718750238418579, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.328125, + "rewards/margins": 1.8703124523162842, + "rewards/rejected": 1.462499976158142, + "step": 15, + "train_speed(iter/s)": 0.080005 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 0.7039423711093902, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.6484375, + "logits/rejected": -1.6218750476837158, + "logps/chosen": -375.20001220703125, + "logps/rejected": -528.0, + "loss": 0.796923828125, + "memory(GiB)": 56.21, + "nll_loss": 0.6761718988418579, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.800000190734863, + "rewards/margins": 3.1968750953674316, + "rewards/rejected": 3.6031250953674316, + "step": 20, + "train_speed(iter/s)": 0.08091 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1024.0, + "eval_loss": 0.3564453125, + "eval_nll_loss": 0.328125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.125, + "eval_rewards/margins": 4.21875, + "eval_rewards/rejected": 4.90625, + "eval_runtime": 4.4028, + "eval_samples_per_second": 0.909, + "eval_steps_per_second": 0.227, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.076790008093808, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.654687523841858, + "logits/rejected": -1.6124999523162842, + "logps/chosen": -546.0, + "logps/rejected": -647.0, + "loss": 0.5287841796875, + "memory(GiB)": 56.21, + "nll_loss": 0.518359363079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.699999809265137, + "rewards/margins": 6.859375, + "rewards/rejected": 0.8388671875, + "step": 25, + "train_speed(iter/s)": 0.079793 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.797995818648193, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.703125, + "logits/rejected": -1.654687523841858, + "logps/chosen": -400.79998779296875, + "logps/rejected": -590.7999877929688, + "loss": 0.4728271484375, + "memory(GiB)": 56.21, + "nll_loss": 0.4722656309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.5, + "rewards/margins": 9.725000381469727, + "rewards/rejected": -1.237695336341858, + "step": 30, + "train_speed(iter/s)": 0.080036 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.71875, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -140.0, + "eval_logps/rejected": -1080.0, + "eval_loss": 0.302978515625, + "eval_nll_loss": 0.302734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.0, + "eval_rewards/margins": 10.25, + "eval_rewards/rejected": -0.30078125, + "eval_runtime": 4.4097, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.4446633571773274, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.71875, + "logits/rejected": -1.765625, + "logps/chosen": -614.0, + "logps/rejected": -618.4000244140625, + "loss": 0.537164306640625, + "memory(GiB)": 56.21, + "nll_loss": 0.53515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.137499809265137, + "rewards/margins": 9.306249618530273, + "rewards/rejected": -0.17939452826976776, + "step": 35, + "train_speed(iter/s)": 0.079077 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7264397691155322, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.6843750476837158, + "logits/rejected": -1.7390625476837158, + "logps/chosen": -474.0, + "logps/rejected": -646.4000244140625, + "loss": 0.5081787109375, + "memory(GiB)": 56.21, + "nll_loss": 0.563671886920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.137499809265137, + "rewards/margins": 10.050000190734863, + "rewards/rejected": 0.096435546875, + "step": 40, + "train_speed(iter/s)": 0.079662 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.7421875, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -136.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.2939453125, + "eval_nll_loss": 0.29296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.375, + "eval_rewards/margins": 9.3125, + "eval_rewards/rejected": 1.1015625, + "eval_runtime": 4.4344, + "eval_samples_per_second": 0.902, + "eval_steps_per_second": 0.226, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.4761790027876643, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.6921875476837158, + "logits/rejected": -1.610937476158142, + "logps/chosen": -440.6000061035156, + "logps/rejected": -664.7999877929688, + "loss": 0.427099609375, + "memory(GiB)": 57.66, + "nll_loss": 0.42656248807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.162500381469727, + "rewards/margins": 11.362500190734863, + "rewards/rejected": -0.18632812798023224, + "step": 45, + "train_speed(iter/s)": 0.078826 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.43416852815625473, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.78125, + "logits/rejected": -1.8156249523162842, + "logps/chosen": -489.20001220703125, + "logps/rejected": -612.7999877929688, + "loss": 0.472528076171875, + "memory(GiB)": 57.66, + "nll_loss": 0.47265625, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.875, + "rewards/margins": 11.850000381469727, + "rewards/rejected": 0.03691406175494194, + "step": 50, + "train_speed(iter/s)": 0.079352 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.8046875, + "eval_logits/rejected": -1.3125, + "eval_logps/chosen": -132.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.2861328125, + "eval_nll_loss": 0.28515625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.75, + "eval_rewards/margins": 8.9375, + "eval_rewards/rejected": 1.8515625, + "eval_runtime": 4.3892, + "eval_samples_per_second": 0.911, + "eval_steps_per_second": 0.228, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.159354970669322, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.65625, + "logits/rejected": -1.7265625, + "logps/chosen": -466.3999938964844, + "logps/rejected": -571.2000122070312, + "loss": 0.454266357421875, + "memory(GiB)": 57.66, + "nll_loss": 0.4535156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.75, + "rewards/margins": 12.324999809265137, + "rewards/rejected": 0.4175781309604645, + "step": 55, + "train_speed(iter/s)": 0.079883 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.13916645187817775, + "learning_rate": 5e-05, + "logits/chosen": -1.6593749523162842, + "logits/rejected": -1.6640625, + "logps/chosen": -506.3999938964844, + "logps/rejected": -442.0, + "loss": 0.41783447265625, + "memory(GiB)": 57.66, + "nll_loss": 0.41796875, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.387499809265137, + "rewards/margins": 13.087499618530273, + "rewards/rejected": 0.296875, + "step": 60, + "train_speed(iter/s)": 0.080545 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.765625, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.28076171875, + "eval_nll_loss": 0.28125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 8.875, + "eval_rewards/rejected": 2.15625, + "eval_runtime": 4.3677, + "eval_samples_per_second": 0.916, + "eval_steps_per_second": 0.229, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.24276457284222902, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.740625023841858, + "logits/rejected": -1.671875, + "logps/chosen": -445.6000061035156, + "logps/rejected": -494.3999938964844, + "loss": 0.3975799560546875, + "memory(GiB)": 57.66, + "nll_loss": 0.39726561307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.237500190734863, + "rewards/margins": 13.087499618530273, + "rewards/rejected": 0.153076171875, + "step": 65, + "train_speed(iter/s)": 0.080487 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.2570014131550182, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.696874976158142, + "logits/rejected": -1.701562523841858, + "logps/chosen": -397.20001220703125, + "logps/rejected": -592.4000244140625, + "loss": 0.4327178955078125, + "memory(GiB)": 57.66, + "nll_loss": 0.4320312440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.399999618530273, + "rewards/margins": 13.337499618530273, + "rewards/rejected": 0.05312500149011612, + "step": 70, + "train_speed(iter/s)": 0.080636 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.278564453125, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 9.25, + "eval_rewards/rejected": 1.75, + "eval_runtime": 4.4055, + "eval_samples_per_second": 0.908, + "eval_steps_per_second": 0.227, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.42633298601984926, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.6531250476837158, + "logits/rejected": -1.609375, + "logps/chosen": -497.6000061035156, + "logps/rejected": -637.2000122070312, + "loss": 0.463232421875, + "memory(GiB)": 57.66, + "nll_loss": 0.46367186307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.300000190734863, + "rewards/margins": 13.5625, + "rewards/rejected": -0.259765625, + "step": 75, + "train_speed(iter/s)": 0.08049 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.3488843908324041, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.7218749523162842, + "logits/rejected": -1.642187476158142, + "logps/chosen": -427.20001220703125, + "logps/rejected": -584.7999877929688, + "loss": 0.45631103515625, + "memory(GiB)": 57.66, + "nll_loss": 0.4730468690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.149999618530273, + "rewards/margins": 13.762499809265137, + "rewards/rejected": -0.6175781488418579, + "step": 80, + "train_speed(iter/s)": 0.080085 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.27880859375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 9.8125, + "eval_rewards/rejected": 1.25, + "eval_runtime": 4.4925, + "eval_samples_per_second": 0.89, + "eval_steps_per_second": 0.223, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.37174945064044734, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.642187476158142, + "logits/rejected": -1.634374976158142, + "logps/chosen": -380.79998779296875, + "logps/rejected": -698.0, + "loss": 0.3918304443359375, + "memory(GiB)": 57.66, + "nll_loss": 0.3910156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.0625, + "rewards/margins": 12.800000190734863, + "rewards/rejected": 0.25874024629592896, + "step": 85, + "train_speed(iter/s)": 0.08009 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.21082412866946396, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.623437523841858, + "logits/rejected": -1.631250023841858, + "logps/chosen": -481.6000061035156, + "logps/rejected": -547.5999755859375, + "loss": 0.42437286376953126, + "memory(GiB)": 57.66, + "nll_loss": 0.4242187440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.662500381469727, + "rewards/margins": 14.637499809265137, + "rewards/rejected": 0.0283203125, + "step": 90, + "train_speed(iter/s)": 0.08076 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.734375, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -129.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.279296875, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 9.9375, + "eval_rewards/rejected": 1.203125, + "eval_runtime": 4.3984, + "eval_samples_per_second": 0.909, + "eval_steps_per_second": 0.227, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.14903932297946823, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -1.6921875476837158, + "logits/rejected": -1.6593749523162842, + "logps/chosen": -511.6000061035156, + "logps/rejected": -677.2000122070312, + "loss": 0.4473388671875, + "memory(GiB)": 57.66, + "nll_loss": 0.447265625, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.737500190734863, + "rewards/margins": 15.587499618530273, + "rewards/rejected": -0.856249988079071, + "step": 95, + "train_speed(iter/s)": 0.080249 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.2676971755335469, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -1.678125023841858, + "logits/rejected": -1.639062523841858, + "logps/chosen": -397.0, + "logps/rejected": -521.5999755859375, + "loss": 0.393194580078125, + "memory(GiB)": 66.91, + "nll_loss": 0.39335936307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.149999618530273, + "rewards/margins": 14.0625, + "rewards/rejected": -0.9228515625, + "step": 100, + "train_speed(iter/s)": 0.080703 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -129.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.279052734375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 9.9375, + "eval_rewards/rejected": 1.203125, + "eval_runtime": 4.4148, + "eval_samples_per_second": 0.906, + "eval_steps_per_second": 0.227, + "step": 100 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 421598153670656.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d780db9cce62c6afec5a42fb7061df032125452d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c680252e14d1d2e99b069fe72abd40b3680b9a4c5bfe6e86b72afd7192436af0 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-100/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/README.md b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bb68a7d6caeadf83c94bf41a98174b4994de1bfb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-32b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..913db4c1f4c80fd71c1f6a73bf55b94a07b79467 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "k_proj", + "q_proj", + "gate_proj", + "o_proj", + "up_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d408327aec71e1384971dba4c6b6158129b9df8e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7716361f2d39e4576c9242e88435232f76b27c1ed223214acff77953903f680d +size 134337704 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/args.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/args.json new file mode 100644 index 0000000000000000000000000000000000000000..050fd8222794164e398a2b20e0ba5c0f3e7fcbc6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-32b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2eba8705d8e6e6633b34e17e96bab51e2e78ec7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0aee51384d3317e077acad40b6ab8153e404f80c17149443b452ba32cd309d6 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..81b8506e8ece0cdd685fc90932e2d31beca3d962 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cdea64f86b36a99edc9478eaf5be052e01230cd960d67ccdb821222bd693d1d +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e1c806841d026661d47ab6b4e76e17d93d9d2b2 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4739a1878fb09aca7ce1456ddeffc6809e3f8b34f413add81d443cf971d67de5 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e93c3308910f15937ec7d34300fb1e3824b78659 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c69c25d435a988a8ba1ec99305745f3bceebccddabbee66dda120dbf89f6568 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..99234f19bbe61b35e3cab3f89ba6220f17aef3da --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:653b6e8e38d9d1efbe96e92f16e91fbf8f11b01523b60b878d6244a0b2d7553c +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2755a12b3826ccea777f57bd028031dda2c9ffe1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c01cce4c67d24c770f7828325064309b07ae4c8b2b9b4c9b401f8c124ee3e037 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f48f4ba5127aada4a189eeceba052d68ec11266 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1fd9bd204603ce9d2065010c22b5f4942f5d39d907b6c566097bbd3f1d68743 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e753364b5f334740b31652014a0148b9b9129b92 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68d942ae7d2c0965a6a2d20b6c8b35a7ea5923491241193ce93fd01e9f3ea324 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..86889b48637c1519700dcf392c5fd4250c396276 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7011aa79e9f3e47f5797e1231a7fceb11e3af7590251ae3883ad9e4abfdb2181 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d73b06a1459664f42de4ff2681f86a5050908335 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63fed471a63452cc896eb8a12991a31cc24a71ca531f75e6e00532b17402da52 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..97dce84c82cccb211f43125e88fc249a2e10eef9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07a28e6722f8e1e9486e98ad2f4d265dedb149dcd5b03e8e36b27b87b614ab65 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa9fa451d8acd14713d9b35ae023bab84f6b3dd9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed22d973ae09d40e9ac5284fda7feeff1b1fea69e729bb2874f8c54f3af8a23 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..909d392b69bc3711f01dd3727ef923fb6608d1be --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a76d28c1d0c5e9c7c0592fe6da9c505fc143cb23c4f5b635cb7fa969ae793eec +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a152dafa7e8732de84860fddffdfcfa43b56b4a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84a53510eb271a5b9d986660a3310fb54fe0e1d414524dfd3a102a800b573088 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3aa888112024f48d86a6928a0293d77a4b044811 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ebd528e13be2e27395632c38ff3b4d851cde3c253a572646b7294c5299a1f4d +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae76bf3d1aa7b2c4c127fecbef75b7c0ae52c87e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1387441b883b4a131fc274f85a4162f8484343f74c089ce6dac8df096333add7 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/latest b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/latest new file mode 100644 index 0000000000000000000000000000000000000000..a9a22a69382a7711ca9e8ab6945c6d2cc8984927 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/latest @@ -0,0 +1 @@ +global_step110 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..584f4a4a43f100f35696d7314a633631af587f25 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7891ffa7c7dae99113aa986d67278b52b8c57db55001dc3547a61f24569a34ee +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..05b027a867e5e9cebd446293ecff82cfb240cc76 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b92875cb04deec367605433847d1bda444b178b643d2da7ed9aaf738d232b4 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..af98f0dfe2a5d89fbccf90df58246a0b078c7016 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f5f3338a05e325b5408a1cd0b6f5e5b10fad05fe479d63f44bec4cf18107d6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..715aa4a4ee3915f810fc2bacb2153eb8a0913781 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1be749fea477a3867d44010631937e0d8f071ca5f9614f9795c92c7fa68833a6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..c7bde70899833455b6ee4a99aff9388abc5ffe92 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbc4a5ea4532c621f4c8e9891117b2e597a7f005001e8b4f2a1b4da8c82bf964 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..90cdeaa2fe438098e9d95ddbc06c765e51af1e78 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480f9fe7dd71b54d915b46162e34b780ba2467d5542115cc809dbca60b394c0e +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..2bd30529614c5be239cd9477af6bef0e313740b6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11d982dcd813e82c2d97a5491ce9624cff2dd22e8655ea617ccef1fc1474470 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..bed311094effd49cc2c89237c675f56eade157d1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73494fac3a001cba7cedd097b97f028d4c1d136ee6709214b0a7fe305e5b9089 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..38b1a376e7c81e3c533cf8a69ddf4eefa9d1336c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0767a9fe84680a5a8a76633a443cb301092115c026c1f5f7f1fbdc53dd7f856f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f8194c8a12d4f86621a5a0b59af5cbf15d114aef --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/trainer_state.json @@ -0,0 +1,634 @@ +{ + "best_metric": 0.27856445, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-70", + "epoch": 2.8947368421052633, + "eval_steps": 10, + "global_step": 110, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 4.7678369137832615, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -1.609375, + "logits/rejected": -1.609375, + "logps/chosen": -664.0, + "logps/rejected": -370.0, + "loss": 1.2783203125, + "memory(GiB)": 14.31, + "nll_loss": 0.5859375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.061081 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 5.77482904203973, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.57421875, + "logits/rejected": -1.63671875, + "logps/chosen": -676.25, + "logps/rejected": -514.0, + "loss": 1.84375, + "memory(GiB)": 24.25, + "nll_loss": 1.1826171875, + "rewards/accuracies": 0.3125, + "rewards/chosen": 0.0674591064453125, + "rewards/margins": 0.08127593994140625, + "rewards/rejected": -0.01410675048828125, + "step": 5, + "train_speed(iter/s)": 0.081107 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 4.776080083637143, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.506250023841858, + "logits/rejected": -1.532812476158142, + "logps/chosen": -573.2000122070312, + "logps/rejected": -644.0, + "loss": 1.8857421875, + "memory(GiB)": 56.21, + "nll_loss": 1.321874976158142, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.605664074420929, + "rewards/margins": 0.3480468690395355, + "rewards/rejected": 0.25761717557907104, + "step": 10, + "train_speed(iter/s)": 0.078746 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -1.6171875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -213.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.7958984375, + "eval_nll_loss": 0.55859375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 2.703125, + "eval_rewards/margins": 1.3515625, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 4.4381, + "eval_samples_per_second": 0.901, + "eval_steps_per_second": 0.225, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.4823344294382115, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.6171875, + "logps/chosen": -602.0, + "logps/rejected": -516.7999877929688, + "loss": 1.065966796875, + "memory(GiB)": 56.21, + "nll_loss": 0.7718750238418579, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.328125, + "rewards/margins": 1.8703124523162842, + "rewards/rejected": 1.462499976158142, + "step": 15, + "train_speed(iter/s)": 0.080005 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 0.7039423711093902, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.6484375, + "logits/rejected": -1.6218750476837158, + "logps/chosen": -375.20001220703125, + "logps/rejected": -528.0, + "loss": 0.796923828125, + "memory(GiB)": 56.21, + "nll_loss": 0.6761718988418579, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.800000190734863, + "rewards/margins": 3.1968750953674316, + "rewards/rejected": 3.6031250953674316, + "step": 20, + "train_speed(iter/s)": 0.08091 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1024.0, + "eval_loss": 0.3564453125, + "eval_nll_loss": 0.328125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.125, + "eval_rewards/margins": 4.21875, + "eval_rewards/rejected": 4.90625, + "eval_runtime": 4.4028, + "eval_samples_per_second": 0.909, + "eval_steps_per_second": 0.227, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.076790008093808, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.654687523841858, + "logits/rejected": -1.6124999523162842, + "logps/chosen": -546.0, + "logps/rejected": -647.0, + "loss": 0.5287841796875, + "memory(GiB)": 56.21, + "nll_loss": 0.518359363079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.699999809265137, + "rewards/margins": 6.859375, + "rewards/rejected": 0.8388671875, + "step": 25, + "train_speed(iter/s)": 0.079793 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.797995818648193, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.703125, + "logits/rejected": -1.654687523841858, + "logps/chosen": -400.79998779296875, + "logps/rejected": -590.7999877929688, + "loss": 0.4728271484375, + "memory(GiB)": 56.21, + "nll_loss": 0.4722656309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.5, + "rewards/margins": 9.725000381469727, + "rewards/rejected": -1.237695336341858, + "step": 30, + "train_speed(iter/s)": 0.080036 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.71875, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -140.0, + "eval_logps/rejected": -1080.0, + "eval_loss": 0.302978515625, + "eval_nll_loss": 0.302734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.0, + "eval_rewards/margins": 10.25, + "eval_rewards/rejected": -0.30078125, + "eval_runtime": 4.4097, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.4446633571773274, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.71875, + "logits/rejected": -1.765625, + "logps/chosen": -614.0, + "logps/rejected": -618.4000244140625, + "loss": 0.537164306640625, + "memory(GiB)": 56.21, + "nll_loss": 0.53515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.137499809265137, + "rewards/margins": 9.306249618530273, + "rewards/rejected": -0.17939452826976776, + "step": 35, + "train_speed(iter/s)": 0.079077 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7264397691155322, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.6843750476837158, + "logits/rejected": -1.7390625476837158, + "logps/chosen": -474.0, + "logps/rejected": -646.4000244140625, + "loss": 0.5081787109375, + "memory(GiB)": 56.21, + "nll_loss": 0.563671886920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.137499809265137, + "rewards/margins": 10.050000190734863, + "rewards/rejected": 0.096435546875, + "step": 40, + "train_speed(iter/s)": 0.079662 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.7421875, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -136.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.2939453125, + "eval_nll_loss": 0.29296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.375, + "eval_rewards/margins": 9.3125, + "eval_rewards/rejected": 1.1015625, + "eval_runtime": 4.4344, + "eval_samples_per_second": 0.902, + "eval_steps_per_second": 0.226, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.4761790027876643, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.6921875476837158, + "logits/rejected": -1.610937476158142, + "logps/chosen": -440.6000061035156, + "logps/rejected": -664.7999877929688, + "loss": 0.427099609375, + "memory(GiB)": 57.66, + "nll_loss": 0.42656248807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.162500381469727, + "rewards/margins": 11.362500190734863, + "rewards/rejected": -0.18632812798023224, + "step": 45, + "train_speed(iter/s)": 0.078826 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.43416852815625473, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.78125, + "logits/rejected": -1.8156249523162842, + "logps/chosen": -489.20001220703125, + "logps/rejected": -612.7999877929688, + "loss": 0.472528076171875, + "memory(GiB)": 57.66, + "nll_loss": 0.47265625, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.875, + "rewards/margins": 11.850000381469727, + "rewards/rejected": 0.03691406175494194, + "step": 50, + "train_speed(iter/s)": 0.079352 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.8046875, + "eval_logits/rejected": -1.3125, + "eval_logps/chosen": -132.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.2861328125, + "eval_nll_loss": 0.28515625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.75, + "eval_rewards/margins": 8.9375, + "eval_rewards/rejected": 1.8515625, + "eval_runtime": 4.3892, + "eval_samples_per_second": 0.911, + "eval_steps_per_second": 0.228, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.159354970669322, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.65625, + "logits/rejected": -1.7265625, + "logps/chosen": -466.3999938964844, + "logps/rejected": -571.2000122070312, + "loss": 0.454266357421875, + "memory(GiB)": 57.66, + "nll_loss": 0.4535156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.75, + "rewards/margins": 12.324999809265137, + "rewards/rejected": 0.4175781309604645, + "step": 55, + "train_speed(iter/s)": 0.079883 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.13916645187817775, + "learning_rate": 5e-05, + "logits/chosen": -1.6593749523162842, + "logits/rejected": -1.6640625, + "logps/chosen": -506.3999938964844, + "logps/rejected": -442.0, + "loss": 0.41783447265625, + "memory(GiB)": 57.66, + "nll_loss": 0.41796875, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.387499809265137, + "rewards/margins": 13.087499618530273, + "rewards/rejected": 0.296875, + "step": 60, + "train_speed(iter/s)": 0.080545 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.765625, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.28076171875, + "eval_nll_loss": 0.28125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 8.875, + "eval_rewards/rejected": 2.15625, + "eval_runtime": 4.3677, + "eval_samples_per_second": 0.916, + "eval_steps_per_second": 0.229, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.24276457284222902, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.740625023841858, + "logits/rejected": -1.671875, + "logps/chosen": -445.6000061035156, + "logps/rejected": -494.3999938964844, + "loss": 0.3975799560546875, + "memory(GiB)": 57.66, + "nll_loss": 0.39726561307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.237500190734863, + "rewards/margins": 13.087499618530273, + "rewards/rejected": 0.153076171875, + "step": 65, + "train_speed(iter/s)": 0.080487 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.2570014131550182, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.696874976158142, + "logits/rejected": -1.701562523841858, + "logps/chosen": -397.20001220703125, + "logps/rejected": -592.4000244140625, + "loss": 0.4327178955078125, + "memory(GiB)": 57.66, + "nll_loss": 0.4320312440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.399999618530273, + "rewards/margins": 13.337499618530273, + "rewards/rejected": 0.05312500149011612, + "step": 70, + "train_speed(iter/s)": 0.080636 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.278564453125, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 9.25, + "eval_rewards/rejected": 1.75, + "eval_runtime": 4.4055, + "eval_samples_per_second": 0.908, + "eval_steps_per_second": 0.227, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.42633298601984926, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.6531250476837158, + "logits/rejected": -1.609375, + "logps/chosen": -497.6000061035156, + "logps/rejected": -637.2000122070312, + "loss": 0.463232421875, + "memory(GiB)": 57.66, + "nll_loss": 0.46367186307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.300000190734863, + "rewards/margins": 13.5625, + "rewards/rejected": -0.259765625, + "step": 75, + "train_speed(iter/s)": 0.08049 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.3488843908324041, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.7218749523162842, + "logits/rejected": -1.642187476158142, + "logps/chosen": -427.20001220703125, + "logps/rejected": -584.7999877929688, + "loss": 0.45631103515625, + "memory(GiB)": 57.66, + "nll_loss": 0.4730468690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.149999618530273, + "rewards/margins": 13.762499809265137, + "rewards/rejected": -0.6175781488418579, + "step": 80, + "train_speed(iter/s)": 0.080085 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.27880859375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 9.8125, + "eval_rewards/rejected": 1.25, + "eval_runtime": 4.4925, + "eval_samples_per_second": 0.89, + "eval_steps_per_second": 0.223, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.37174945064044734, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.642187476158142, + "logits/rejected": -1.634374976158142, + "logps/chosen": -380.79998779296875, + "logps/rejected": -698.0, + "loss": 0.3918304443359375, + "memory(GiB)": 57.66, + "nll_loss": 0.3910156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.0625, + "rewards/margins": 12.800000190734863, + "rewards/rejected": 0.25874024629592896, + "step": 85, + "train_speed(iter/s)": 0.08009 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.21082412866946396, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.623437523841858, + "logits/rejected": -1.631250023841858, + "logps/chosen": -481.6000061035156, + "logps/rejected": -547.5999755859375, + "loss": 0.42437286376953126, + "memory(GiB)": 57.66, + "nll_loss": 0.4242187440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.662500381469727, + "rewards/margins": 14.637499809265137, + "rewards/rejected": 0.0283203125, + "step": 90, + "train_speed(iter/s)": 0.08076 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.734375, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -129.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.279296875, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 9.9375, + "eval_rewards/rejected": 1.203125, + "eval_runtime": 4.3984, + "eval_samples_per_second": 0.909, + "eval_steps_per_second": 0.227, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.14903932297946823, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -1.6921875476837158, + "logits/rejected": -1.6593749523162842, + "logps/chosen": -511.6000061035156, + "logps/rejected": -677.2000122070312, + "loss": 0.4473388671875, + "memory(GiB)": 57.66, + "nll_loss": 0.447265625, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.737500190734863, + "rewards/margins": 15.587499618530273, + "rewards/rejected": -0.856249988079071, + "step": 95, + "train_speed(iter/s)": 0.080249 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.2676971755335469, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -1.678125023841858, + "logits/rejected": -1.639062523841858, + "logps/chosen": -397.0, + "logps/rejected": -521.5999755859375, + "loss": 0.393194580078125, + "memory(GiB)": 66.91, + "nll_loss": 0.39335936307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.149999618530273, + "rewards/margins": 14.0625, + "rewards/rejected": -0.9228515625, + "step": 100, + "train_speed(iter/s)": 0.080703 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -129.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.279052734375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 9.9375, + "eval_rewards/rejected": 1.203125, + "eval_runtime": 4.4148, + "eval_samples_per_second": 0.906, + "eval_steps_per_second": 0.227, + "step": 100 + }, + { + "epoch": 2.763157894736842, + "grad_norm": 0.4207516520255958, + "learning_rate": 1.70370868554659e-06, + "logits/chosen": -1.6687500476837158, + "logits/rejected": -1.7062499523162842, + "logps/chosen": -429.6499938964844, + "logps/rejected": -526.4000244140625, + "loss": 0.4191162109375, + "memory(GiB)": 66.91, + "nll_loss": 0.41874998807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.762499809265137, + "rewards/margins": 14.225000381469727, + "rewards/rejected": -0.47050780057907104, + "step": 105, + "train_speed(iter/s)": 0.080527 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 0.3068828002596679, + "learning_rate": 3.380821129028489e-07, + "logits/chosen": -1.693750023841858, + "logits/rejected": -1.6875, + "logps/chosen": -446.79998779296875, + "logps/rejected": -566.4000244140625, + "loss": 0.380780029296875, + "memory(GiB)": 66.91, + "nll_loss": 0.38066405057907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.350000381469727, + "rewards/margins": 15.162500381469727, + "rewards/rejected": -0.801953136920929, + "step": 110, + "train_speed(iter/s)": 0.0807 + }, + { + "epoch": 2.8947368421052633, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.2734375, + "eval_logps/chosen": -129.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.27880859375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 10.125, + "eval_rewards/rejected": 1.0, + "eval_runtime": 4.4089, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 110 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 463155319799808.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d780db9cce62c6afec5a42fb7061df032125452d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c680252e14d1d2e99b069fe72abd40b3680b9a4c5bfe6e86b72afd7192436af0 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-110/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/README.md b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bb68a7d6caeadf83c94bf41a98174b4994de1bfb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-32b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..913db4c1f4c80fd71c1f6a73bf55b94a07b79467 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "k_proj", + "q_proj", + "gate_proj", + "o_proj", + "up_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbfeb41f8cf7c8f543f3220bf7854b91a7f13cd5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e99591a25f4edb17fba1802a4ae98844bce0183e6de5607a3eb94a15561a23a +size 134337704 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/args.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/args.json new file mode 100644 index 0000000000000000000000000000000000000000..050fd8222794164e398a2b20e0ba5c0f3e7fcbc6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-32b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab173c589991d2695cd9f9b5009adde3408194a7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4d1885c9d3bdb4c1a2b93a63a4d640a2c8de1f0eb115743196563a3f14310c4 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..27fcc9d4946eb946a683449bced9b6c1ac776d59 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b61e7ff5720bae11c8d0d90cdcb6f04e5ed236828bdde6685b6377139409d551 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9136541d5ae49bae62c81d924f5c9092cbfbb17f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:251647718fe5fb3f9ed6729f28b3abc294242de0e56547dd1be17c6ab37ef53e +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c81709358479f4f2bf738df473e4f4860a69e0fc --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:241ce6640a55566961a16f2a0468d2c86cc3c3f547047e75c658495266cf1639 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c45eb627f357aacdd00ee30832c1438a39cba07 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e00e8185be6feedc778d81e09abe320dc7080e6a3ef29363961bf4babcf11b74 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f532afb60d0bacc43ffa36d0dbd799df946e3fc --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5faabb5cd8bc7d531414c229d89bbf483ff2eccf3cd654e266d748fcaebc7e7 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..43734ad97cdae4167e64b31f9bd95828c3bababb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f952afa1a27ae7d9d8dd16312280d71b2d2da523b4a78b2f50dc34816eaa661e +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7816b9808fefe939f86129db0c9930a39962c57e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12b08a4493680c185f7a8e8d031397a937c6152bb0f74dd01c83a2513b59846a +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..53dc9a6bcc4e6233069b7970c0820e8435e16813 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54095c99836ae030118e2e030bc7eea5f651d02e644a0a76212771adf9d43fb6 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc50b2a8cc153aaccde56f64d7b0e4c72ff0ba09 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38c8e755f3e36ce3faa3282733c9a7713b4e9fb6b7c76449877650009b7273b2 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bfa3ae9fa4b8746734022a7e3ed29ee5ba3143da --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3a9739ed8863b740a5e9414e826780073db7ac9bc6d0a35f9c61f85e9df80f7 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc0f8ac127697dc96736d7d6d1d41156f9e4f29a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:473f65edb4ddb09b0c01b52e7029aa922171edbf07c0e70628012b5f5bc1bd7c +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ac70ad5d11dd3e2a380903dcd5cbb7b3cad9f69 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7a0af1b4843d25f0e5fae735a8996e55d4c657bcd930240e80786f2ba0f694e +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..072e23fe09240f540de4effed0c74b58be373e17 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbd5a00ccd32301ab09ca14e493b34750056e34c8c69eb1403fab1bd6efc836a +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4298ea5ce265d5347a67f5d5b5f8940a76d317be --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8c938ce4ac141fdc6d26f69e85373475e7726d2f5fb964586cc8016d96d1710 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d98961c8fe7a7d1b4c8d1b0fcad732077dd62614 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0ed8f3af49e6ad29ddd7590f0827044c41016e508ba568599cc7081af5dc423 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/latest b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/latest new file mode 100644 index 0000000000000000000000000000000000000000..aad80f76777fd4d23b0b81026f4601524335cbe1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/latest @@ -0,0 +1 @@ +global_step114 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..80f51268a9828e9592a20d8ae8b2cd4ba4bc362c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d994b317c4df888a1a1aabc0c532e81f1fa34c18c8313cb2feadca3bb37194 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..62e581603d525611f9660b6e859462f72bbc9258 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5b05860618aa49c7f5d8c366d6ee73cf8b3b0d0adc17d9313b72621630d0aa +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..18b03e632222a58e33ea4fca874b9c52628cc5e1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7262faf861e984775b4fd85bc76a11b0b8b04037690e8a08a58cf9ff5328a042 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..41735b5f7ace13ffa57ebed3e7042f1a48ac17fb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9479cad91150e2e266d17eb95fe678579a770f6df6b53496cf72067b186b094d +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..ebbb93c1d99b1645075ea27fc9fae66992a691f5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:435cb6cf559e0ce3fe0d4582cac16ea40b48b7a64589952402a4c399cafbfc00 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0656f84b77a33c9ceba9df16f36437b55ef71bc7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51001b0d8dc5792180c3a9705ccbfa66b61d46d7639afb6f7abf409629ed74f +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..32b3a313372ee4a2eeaeed69789f8fb4e2c70ad0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1e87084f11088fdce293e1fbbb05e35f5c7385b00e2f9ba195bf61cb36f757d +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..9c5a45264129fe1d7c409a6867de1a9751476a8e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d32e9bdd65145ae509e6c6ef4f6ea9d842f94a34c34a0d7d2ab6c248d3f2121 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a86ac614a477eb67963adb2c8c07f37c79ded059 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d7a9fd18bda7faa50931342147a7de5605bed0f91f6c70d821e84b7bf8f444f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ebc98480c5730fe2ffca04959581a1b5ee28e504 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/trainer_state.json @@ -0,0 +1,651 @@ +{ + "best_metric": 0.27758789, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114", + "epoch": 3.0, + "eval_steps": 10, + "global_step": 114, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 4.7678369137832615, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -1.609375, + "logits/rejected": -1.609375, + "logps/chosen": -664.0, + "logps/rejected": -370.0, + "loss": 1.2783203125, + "memory(GiB)": 14.31, + "nll_loss": 0.5859375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.061081 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 5.77482904203973, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.57421875, + "logits/rejected": -1.63671875, + "logps/chosen": -676.25, + "logps/rejected": -514.0, + "loss": 1.84375, + "memory(GiB)": 24.25, + "nll_loss": 1.1826171875, + "rewards/accuracies": 0.3125, + "rewards/chosen": 0.0674591064453125, + "rewards/margins": 0.08127593994140625, + "rewards/rejected": -0.01410675048828125, + "step": 5, + "train_speed(iter/s)": 0.081107 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 4.776080083637143, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.506250023841858, + "logits/rejected": -1.532812476158142, + "logps/chosen": -573.2000122070312, + "logps/rejected": -644.0, + "loss": 1.8857421875, + "memory(GiB)": 56.21, + "nll_loss": 1.321874976158142, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.605664074420929, + "rewards/margins": 0.3480468690395355, + "rewards/rejected": 0.25761717557907104, + "step": 10, + "train_speed(iter/s)": 0.078746 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -1.6171875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -213.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.7958984375, + "eval_nll_loss": 0.55859375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 2.703125, + "eval_rewards/margins": 1.3515625, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 4.4381, + "eval_samples_per_second": 0.901, + "eval_steps_per_second": 0.225, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.4823344294382115, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.6171875, + "logps/chosen": -602.0, + "logps/rejected": -516.7999877929688, + "loss": 1.065966796875, + "memory(GiB)": 56.21, + "nll_loss": 0.7718750238418579, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.328125, + "rewards/margins": 1.8703124523162842, + "rewards/rejected": 1.462499976158142, + "step": 15, + "train_speed(iter/s)": 0.080005 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 0.7039423711093902, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.6484375, + "logits/rejected": -1.6218750476837158, + "logps/chosen": -375.20001220703125, + "logps/rejected": -528.0, + "loss": 0.796923828125, + "memory(GiB)": 56.21, + "nll_loss": 0.6761718988418579, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.800000190734863, + "rewards/margins": 3.1968750953674316, + "rewards/rejected": 3.6031250953674316, + "step": 20, + "train_speed(iter/s)": 0.08091 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1024.0, + "eval_loss": 0.3564453125, + "eval_nll_loss": 0.328125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.125, + "eval_rewards/margins": 4.21875, + "eval_rewards/rejected": 4.90625, + "eval_runtime": 4.4028, + "eval_samples_per_second": 0.909, + "eval_steps_per_second": 0.227, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.076790008093808, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.654687523841858, + "logits/rejected": -1.6124999523162842, + "logps/chosen": -546.0, + "logps/rejected": -647.0, + "loss": 0.5287841796875, + "memory(GiB)": 56.21, + "nll_loss": 0.518359363079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.699999809265137, + "rewards/margins": 6.859375, + "rewards/rejected": 0.8388671875, + "step": 25, + "train_speed(iter/s)": 0.079793 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.797995818648193, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.703125, + "logits/rejected": -1.654687523841858, + "logps/chosen": -400.79998779296875, + "logps/rejected": -590.7999877929688, + "loss": 0.4728271484375, + "memory(GiB)": 56.21, + "nll_loss": 0.4722656309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.5, + "rewards/margins": 9.725000381469727, + "rewards/rejected": -1.237695336341858, + "step": 30, + "train_speed(iter/s)": 0.080036 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.71875, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -140.0, + "eval_logps/rejected": -1080.0, + "eval_loss": 0.302978515625, + "eval_nll_loss": 0.302734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.0, + "eval_rewards/margins": 10.25, + "eval_rewards/rejected": -0.30078125, + "eval_runtime": 4.4097, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.4446633571773274, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.71875, + "logits/rejected": -1.765625, + "logps/chosen": -614.0, + "logps/rejected": -618.4000244140625, + "loss": 0.537164306640625, + "memory(GiB)": 56.21, + "nll_loss": 0.53515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.137499809265137, + "rewards/margins": 9.306249618530273, + "rewards/rejected": -0.17939452826976776, + "step": 35, + "train_speed(iter/s)": 0.079077 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7264397691155322, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.6843750476837158, + "logits/rejected": -1.7390625476837158, + "logps/chosen": -474.0, + "logps/rejected": -646.4000244140625, + "loss": 0.5081787109375, + "memory(GiB)": 56.21, + "nll_loss": 0.563671886920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.137499809265137, + "rewards/margins": 10.050000190734863, + "rewards/rejected": 0.096435546875, + "step": 40, + "train_speed(iter/s)": 0.079662 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.7421875, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -136.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.2939453125, + "eval_nll_loss": 0.29296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.375, + "eval_rewards/margins": 9.3125, + "eval_rewards/rejected": 1.1015625, + "eval_runtime": 4.4344, + "eval_samples_per_second": 0.902, + "eval_steps_per_second": 0.226, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.4761790027876643, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.6921875476837158, + "logits/rejected": -1.610937476158142, + "logps/chosen": -440.6000061035156, + "logps/rejected": -664.7999877929688, + "loss": 0.427099609375, + "memory(GiB)": 57.66, + "nll_loss": 0.42656248807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.162500381469727, + "rewards/margins": 11.362500190734863, + "rewards/rejected": -0.18632812798023224, + "step": 45, + "train_speed(iter/s)": 0.078826 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.43416852815625473, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.78125, + "logits/rejected": -1.8156249523162842, + "logps/chosen": -489.20001220703125, + "logps/rejected": -612.7999877929688, + "loss": 0.472528076171875, + "memory(GiB)": 57.66, + "nll_loss": 0.47265625, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.875, + "rewards/margins": 11.850000381469727, + "rewards/rejected": 0.03691406175494194, + "step": 50, + "train_speed(iter/s)": 0.079352 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.8046875, + "eval_logits/rejected": -1.3125, + "eval_logps/chosen": -132.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.2861328125, + "eval_nll_loss": 0.28515625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.75, + "eval_rewards/margins": 8.9375, + "eval_rewards/rejected": 1.8515625, + "eval_runtime": 4.3892, + "eval_samples_per_second": 0.911, + "eval_steps_per_second": 0.228, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.159354970669322, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.65625, + "logits/rejected": -1.7265625, + "logps/chosen": -466.3999938964844, + "logps/rejected": -571.2000122070312, + "loss": 0.454266357421875, + "memory(GiB)": 57.66, + "nll_loss": 0.4535156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.75, + "rewards/margins": 12.324999809265137, + "rewards/rejected": 0.4175781309604645, + "step": 55, + "train_speed(iter/s)": 0.079883 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.13916645187817775, + "learning_rate": 5e-05, + "logits/chosen": -1.6593749523162842, + "logits/rejected": -1.6640625, + "logps/chosen": -506.3999938964844, + "logps/rejected": -442.0, + "loss": 0.41783447265625, + "memory(GiB)": 57.66, + "nll_loss": 0.41796875, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.387499809265137, + "rewards/margins": 13.087499618530273, + "rewards/rejected": 0.296875, + "step": 60, + "train_speed(iter/s)": 0.080545 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.765625, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.28076171875, + "eval_nll_loss": 0.28125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 8.875, + "eval_rewards/rejected": 2.15625, + "eval_runtime": 4.3677, + "eval_samples_per_second": 0.916, + "eval_steps_per_second": 0.229, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.24276457284222902, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.740625023841858, + "logits/rejected": -1.671875, + "logps/chosen": -445.6000061035156, + "logps/rejected": -494.3999938964844, + "loss": 0.3975799560546875, + "memory(GiB)": 57.66, + "nll_loss": 0.39726561307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.237500190734863, + "rewards/margins": 13.087499618530273, + "rewards/rejected": 0.153076171875, + "step": 65, + "train_speed(iter/s)": 0.080487 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.2570014131550182, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.696874976158142, + "logits/rejected": -1.701562523841858, + "logps/chosen": -397.20001220703125, + "logps/rejected": -592.4000244140625, + "loss": 0.4327178955078125, + "memory(GiB)": 57.66, + "nll_loss": 0.4320312440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.399999618530273, + "rewards/margins": 13.337499618530273, + "rewards/rejected": 0.05312500149011612, + "step": 70, + "train_speed(iter/s)": 0.080636 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.278564453125, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 9.25, + "eval_rewards/rejected": 1.75, + "eval_runtime": 4.4055, + "eval_samples_per_second": 0.908, + "eval_steps_per_second": 0.227, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.42633298601984926, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.6531250476837158, + "logits/rejected": -1.609375, + "logps/chosen": -497.6000061035156, + "logps/rejected": -637.2000122070312, + "loss": 0.463232421875, + "memory(GiB)": 57.66, + "nll_loss": 0.46367186307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.300000190734863, + "rewards/margins": 13.5625, + "rewards/rejected": -0.259765625, + "step": 75, + "train_speed(iter/s)": 0.08049 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.3488843908324041, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.7218749523162842, + "logits/rejected": -1.642187476158142, + "logps/chosen": -427.20001220703125, + "logps/rejected": -584.7999877929688, + "loss": 0.45631103515625, + "memory(GiB)": 57.66, + "nll_loss": 0.4730468690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.149999618530273, + "rewards/margins": 13.762499809265137, + "rewards/rejected": -0.6175781488418579, + "step": 80, + "train_speed(iter/s)": 0.080085 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.27880859375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 9.8125, + "eval_rewards/rejected": 1.25, + "eval_runtime": 4.4925, + "eval_samples_per_second": 0.89, + "eval_steps_per_second": 0.223, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.37174945064044734, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.642187476158142, + "logits/rejected": -1.634374976158142, + "logps/chosen": -380.79998779296875, + "logps/rejected": -698.0, + "loss": 0.3918304443359375, + "memory(GiB)": 57.66, + "nll_loss": 0.3910156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.0625, + "rewards/margins": 12.800000190734863, + "rewards/rejected": 0.25874024629592896, + "step": 85, + "train_speed(iter/s)": 0.08009 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.21082412866946396, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.623437523841858, + "logits/rejected": -1.631250023841858, + "logps/chosen": -481.6000061035156, + "logps/rejected": -547.5999755859375, + "loss": 0.42437286376953126, + "memory(GiB)": 57.66, + "nll_loss": 0.4242187440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.662500381469727, + "rewards/margins": 14.637499809265137, + "rewards/rejected": 0.0283203125, + "step": 90, + "train_speed(iter/s)": 0.08076 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.734375, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -129.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.279296875, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 9.9375, + "eval_rewards/rejected": 1.203125, + "eval_runtime": 4.3984, + "eval_samples_per_second": 0.909, + "eval_steps_per_second": 0.227, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.14903932297946823, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -1.6921875476837158, + "logits/rejected": -1.6593749523162842, + "logps/chosen": -511.6000061035156, + "logps/rejected": -677.2000122070312, + "loss": 0.4473388671875, + "memory(GiB)": 57.66, + "nll_loss": 0.447265625, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.737500190734863, + "rewards/margins": 15.587499618530273, + "rewards/rejected": -0.856249988079071, + "step": 95, + "train_speed(iter/s)": 0.080249 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.2676971755335469, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -1.678125023841858, + "logits/rejected": -1.639062523841858, + "logps/chosen": -397.0, + "logps/rejected": -521.5999755859375, + "loss": 0.393194580078125, + "memory(GiB)": 66.91, + "nll_loss": 0.39335936307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.149999618530273, + "rewards/margins": 14.0625, + "rewards/rejected": -0.9228515625, + "step": 100, + "train_speed(iter/s)": 0.080703 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -129.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.279052734375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 9.9375, + "eval_rewards/rejected": 1.203125, + "eval_runtime": 4.4148, + "eval_samples_per_second": 0.906, + "eval_steps_per_second": 0.227, + "step": 100 + }, + { + "epoch": 2.763157894736842, + "grad_norm": 0.4207516520255958, + "learning_rate": 1.70370868554659e-06, + "logits/chosen": -1.6687500476837158, + "logits/rejected": -1.7062499523162842, + "logps/chosen": -429.6499938964844, + "logps/rejected": -526.4000244140625, + "loss": 0.4191162109375, + "memory(GiB)": 66.91, + "nll_loss": 0.41874998807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.762499809265137, + "rewards/margins": 14.225000381469727, + "rewards/rejected": -0.47050780057907104, + "step": 105, + "train_speed(iter/s)": 0.080527 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 0.3068828002596679, + "learning_rate": 3.380821129028489e-07, + "logits/chosen": -1.693750023841858, + "logits/rejected": -1.6875, + "logps/chosen": -446.79998779296875, + "logps/rejected": -566.4000244140625, + "loss": 0.380780029296875, + "memory(GiB)": 66.91, + "nll_loss": 0.38066405057907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.350000381469727, + "rewards/margins": 15.162500381469727, + "rewards/rejected": -0.801953136920929, + "step": 110, + "train_speed(iter/s)": 0.0807 + }, + { + "epoch": 2.8947368421052633, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.2734375, + "eval_logps/chosen": -129.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.27880859375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 10.125, + "eval_rewards/rejected": 1.0, + "eval_runtime": 4.4089, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 110 + }, + { + "epoch": 3.0, + "eval_logits/chosen": -1.734375, + "eval_logits/rejected": -1.2734375, + "eval_logps/chosen": -128.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.277587890625, + "eval_nll_loss": 0.27734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.1875, + "eval_rewards/margins": 10.1875, + "eval_rewards/rejected": 1.0, + "eval_runtime": 4.3398, + "eval_samples_per_second": 0.922, + "eval_steps_per_second": 0.23, + "step": 114 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 479640253628416.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d780db9cce62c6afec5a42fb7061df032125452d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c680252e14d1d2e99b069fe72abd40b3680b9a4c5bfe6e86b72afd7192436af0 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/README.md b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bb68a7d6caeadf83c94bf41a98174b4994de1bfb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-32b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..913db4c1f4c80fd71c1f6a73bf55b94a07b79467 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "k_proj", + "q_proj", + "gate_proj", + "o_proj", + "up_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aff13c1700f9d3b78ef0106a315d3cec50144050 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:326ee8e7df9f6376c204cb5072e2ffd9908410c751bb92d7886d8867100a2c1a +size 134337704 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/args.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/args.json new file mode 100644 index 0000000000000000000000000000000000000000..050fd8222794164e398a2b20e0ba5c0f3e7fcbc6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-32b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..84c4fc4f45fc09c388a6be7476b0ed2ba348ce1d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4ace534296aa836c4aa1fc4925dd080a583eecbf57a796d0b9be8a3cac909fb +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..532b1c29892cf671664faf4bb5d1d9459da50323 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ffd0d6b890559bc1a06d7aa0b3667957fb021b4be2c9a94870e1da795e3bbc +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7e5e74c7c82edde0118e70bb6a102765bec5a54 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bb305b82cc36a9965585091b450249497d675eb40dd9a77c6f997326494a8b6 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..842b5ebe20820576c41b0a8c83f6d6bba03bc938 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8b9d5b27c382052daadc47ca61fadfed85b7c9d51c11625b3af4646cb0fed4f +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..76024dc3df4b64c20e783846f5d527e84fa3657d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:040cdcc99df47bfc736497af681e4b5c1c46f070061e2c713e31dc9e3e2478f6 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbcc7a5f02cde8528932d8f8fd9cc5250f2dbebb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ec05cacc1c88f289a5a0d759deb52fdca3c8b0e2c842ccc2fc866ba84b9a59 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c46cc43dd2d1223e9f8e3a3318be4f7bee03158b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:338553272e5987e08a67fd6ae351ad3fe85ae830f892e22d26f4e8086e71a86d +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b29040d81fbe8e133f1c79ff4cf6bac489a8a4b8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e15fdadebebcbc61a103bf5f10a4b318d6d9bf79690d6b7b3e6e5f44284330 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..84f6e05655138cb2c183077640365bbdb4d53984 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4f36d9b56d77055f4f1455f7dbba9bef4402905acf28686157836702329d45a +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d47af3d97f6b0e26c2c92bf7dd281ad184f4dee3 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06f7d1f9bbd4a2c78ed38a2087cb38ecdb6e8260c7318a6d76920b2dc1261e32 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f56de979efa68b215b4eead190912770fe5dea33 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf0d43d9513d732b53c7ec45ce13e033bd48cf37dd35e17b71a3b698f960fe37 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dce5eda351453c8a351aa5379b8f519f8291bc38 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e88dd8b692def807027787e5975f23ec8bec98f15ac6f1f2753dccf64c6f98a +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9337d6678ad945148a0b7d566151166d9056770a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81bfc06d39b12eb5f9736867ace7484df1b7f11843c1a3fc7933303d58736a03 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..868b6b65b8b93da7dfb3eb35852eb5baf3cd2715 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:746f3dbbe62e70fe59fef4f91064c45a4351929431dad77b3aa9a0fe8ce82df7 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb315f9ade2849d0d5f5d4549949f173d0f7305d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a8daf139a8e67152e96758cf6a94bdd58c880092af4299205e60bdd19f283a +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8312aa35b8dd8dd914e61d5b20963f3f5a5dce54 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:301c209ab202ee9b6b96f1857aa3a9cfcbf8f1b971f20e9f8d21d0ca0f959b93 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/latest b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/latest new file mode 100644 index 0000000000000000000000000000000000000000..75eab498d0366633484ab40334e4b8fb92b16dad --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/latest @@ -0,0 +1 @@ +global_step80 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..0b8b4067e4559b34f9b554c4963fe80d7f5fe839 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba4c26c615bd5830d41566fab54dc69174be292761b34514b27fbe82b45b630b +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..8c0265e51b5761ac9b323aa87ba00ba14b97e202 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c761d7f9b90c29c2d348a1133fd39be52c65e6bee4c2d179f6a6e564eb3a40 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f5dd5aabcd6e7332f14a4796d6ec6c758e10aea0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccab847cc956e055fd3f9dcce06898826d065211e945b83576c8d487f87c5469 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..bcbdea3a573c2b7717f23e2ea0e4a6da6670d65d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e5f1dbdcf6ec820c22fd1e4258fcd7af2a2bce65c480988d3f111aa574c9c06 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..1cdcb8d1710063a6c30dec635b4c44e3cb6cd24e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a23184c3e806d2649776427d1da2c0c9137f9b23a84468f3bdd5bbc75f696c9 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e39323a662c284cd109b5ce8c39e8a0ce375f2c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382fc01b809542bf6f5e26742e3e19e80a1f189ac5de24cf8cd822e303916b83 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ce685d2e57181f70debfb25eb90cb76ceaf47da --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b178265c7d2ae07bff10b7312e5e49b9f5b4914c38969d2f64a6ca006296bca +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5e363b8083cdd817e0b3a2e6fd1b65a905e189b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:668825a859126c4cf32afb883895c91004130b6aee02178736ca2840e5429ad0 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eaf96d6803aea265d756d902db3c4cc2386f9742 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90524bcdb94734ac7120e4205110f14662bff8cee00eed50355875dcdc538029 +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c4eb53e6f965a85c211898ba7f397b950376e498 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/trainer_state.json @@ -0,0 +1,475 @@ +{ + "best_metric": 0.27856445, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-70", + "epoch": 2.1052631578947367, + "eval_steps": 10, + "global_step": 80, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 4.7678369137832615, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -1.609375, + "logits/rejected": -1.609375, + "logps/chosen": -664.0, + "logps/rejected": -370.0, + "loss": 1.2783203125, + "memory(GiB)": 14.31, + "nll_loss": 0.5859375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.061081 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 5.77482904203973, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.57421875, + "logits/rejected": -1.63671875, + "logps/chosen": -676.25, + "logps/rejected": -514.0, + "loss": 1.84375, + "memory(GiB)": 24.25, + "nll_loss": 1.1826171875, + "rewards/accuracies": 0.3125, + "rewards/chosen": 0.0674591064453125, + "rewards/margins": 0.08127593994140625, + "rewards/rejected": -0.01410675048828125, + "step": 5, + "train_speed(iter/s)": 0.081107 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 4.776080083637143, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.506250023841858, + "logits/rejected": -1.532812476158142, + "logps/chosen": -573.2000122070312, + "logps/rejected": -644.0, + "loss": 1.8857421875, + "memory(GiB)": 56.21, + "nll_loss": 1.321874976158142, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.605664074420929, + "rewards/margins": 0.3480468690395355, + "rewards/rejected": 0.25761717557907104, + "step": 10, + "train_speed(iter/s)": 0.078746 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -1.6171875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -213.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.7958984375, + "eval_nll_loss": 0.55859375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 2.703125, + "eval_rewards/margins": 1.3515625, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 4.4381, + "eval_samples_per_second": 0.901, + "eval_steps_per_second": 0.225, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.4823344294382115, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.6171875, + "logps/chosen": -602.0, + "logps/rejected": -516.7999877929688, + "loss": 1.065966796875, + "memory(GiB)": 56.21, + "nll_loss": 0.7718750238418579, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.328125, + "rewards/margins": 1.8703124523162842, + "rewards/rejected": 1.462499976158142, + "step": 15, + "train_speed(iter/s)": 0.080005 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 0.7039423711093902, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.6484375, + "logits/rejected": -1.6218750476837158, + "logps/chosen": -375.20001220703125, + "logps/rejected": -528.0, + "loss": 0.796923828125, + "memory(GiB)": 56.21, + "nll_loss": 0.6761718988418579, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.800000190734863, + "rewards/margins": 3.1968750953674316, + "rewards/rejected": 3.6031250953674316, + "step": 20, + "train_speed(iter/s)": 0.08091 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1024.0, + "eval_loss": 0.3564453125, + "eval_nll_loss": 0.328125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.125, + "eval_rewards/margins": 4.21875, + "eval_rewards/rejected": 4.90625, + "eval_runtime": 4.4028, + "eval_samples_per_second": 0.909, + "eval_steps_per_second": 0.227, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.076790008093808, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.654687523841858, + "logits/rejected": -1.6124999523162842, + "logps/chosen": -546.0, + "logps/rejected": -647.0, + "loss": 0.5287841796875, + "memory(GiB)": 56.21, + "nll_loss": 0.518359363079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.699999809265137, + "rewards/margins": 6.859375, + "rewards/rejected": 0.8388671875, + "step": 25, + "train_speed(iter/s)": 0.079793 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.797995818648193, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.703125, + "logits/rejected": -1.654687523841858, + "logps/chosen": -400.79998779296875, + "logps/rejected": -590.7999877929688, + "loss": 0.4728271484375, + "memory(GiB)": 56.21, + "nll_loss": 0.4722656309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.5, + "rewards/margins": 9.725000381469727, + "rewards/rejected": -1.237695336341858, + "step": 30, + "train_speed(iter/s)": 0.080036 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.71875, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -140.0, + "eval_logps/rejected": -1080.0, + "eval_loss": 0.302978515625, + "eval_nll_loss": 0.302734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.0, + "eval_rewards/margins": 10.25, + "eval_rewards/rejected": -0.30078125, + "eval_runtime": 4.4097, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.4446633571773274, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.71875, + "logits/rejected": -1.765625, + "logps/chosen": -614.0, + "logps/rejected": -618.4000244140625, + "loss": 0.537164306640625, + "memory(GiB)": 56.21, + "nll_loss": 0.53515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.137499809265137, + "rewards/margins": 9.306249618530273, + "rewards/rejected": -0.17939452826976776, + "step": 35, + "train_speed(iter/s)": 0.079077 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7264397691155322, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.6843750476837158, + "logits/rejected": -1.7390625476837158, + "logps/chosen": -474.0, + "logps/rejected": -646.4000244140625, + "loss": 0.5081787109375, + "memory(GiB)": 56.21, + "nll_loss": 0.563671886920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.137499809265137, + "rewards/margins": 10.050000190734863, + "rewards/rejected": 0.096435546875, + "step": 40, + "train_speed(iter/s)": 0.079662 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.7421875, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -136.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.2939453125, + "eval_nll_loss": 0.29296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.375, + "eval_rewards/margins": 9.3125, + "eval_rewards/rejected": 1.1015625, + "eval_runtime": 4.4344, + "eval_samples_per_second": 0.902, + "eval_steps_per_second": 0.226, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.4761790027876643, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.6921875476837158, + "logits/rejected": -1.610937476158142, + "logps/chosen": -440.6000061035156, + "logps/rejected": -664.7999877929688, + "loss": 0.427099609375, + "memory(GiB)": 57.66, + "nll_loss": 0.42656248807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.162500381469727, + "rewards/margins": 11.362500190734863, + "rewards/rejected": -0.18632812798023224, + "step": 45, + "train_speed(iter/s)": 0.078826 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.43416852815625473, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.78125, + "logits/rejected": -1.8156249523162842, + "logps/chosen": -489.20001220703125, + "logps/rejected": -612.7999877929688, + "loss": 0.472528076171875, + "memory(GiB)": 57.66, + "nll_loss": 0.47265625, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.875, + "rewards/margins": 11.850000381469727, + "rewards/rejected": 0.03691406175494194, + "step": 50, + "train_speed(iter/s)": 0.079352 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.8046875, + "eval_logits/rejected": -1.3125, + "eval_logps/chosen": -132.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.2861328125, + "eval_nll_loss": 0.28515625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.75, + "eval_rewards/margins": 8.9375, + "eval_rewards/rejected": 1.8515625, + "eval_runtime": 4.3892, + "eval_samples_per_second": 0.911, + "eval_steps_per_second": 0.228, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.159354970669322, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.65625, + "logits/rejected": -1.7265625, + "logps/chosen": -466.3999938964844, + "logps/rejected": -571.2000122070312, + "loss": 0.454266357421875, + "memory(GiB)": 57.66, + "nll_loss": 0.4535156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.75, + "rewards/margins": 12.324999809265137, + "rewards/rejected": 0.4175781309604645, + "step": 55, + "train_speed(iter/s)": 0.079883 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.13916645187817775, + "learning_rate": 5e-05, + "logits/chosen": -1.6593749523162842, + "logits/rejected": -1.6640625, + "logps/chosen": -506.3999938964844, + "logps/rejected": -442.0, + "loss": 0.41783447265625, + "memory(GiB)": 57.66, + "nll_loss": 0.41796875, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.387499809265137, + "rewards/margins": 13.087499618530273, + "rewards/rejected": 0.296875, + "step": 60, + "train_speed(iter/s)": 0.080545 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.765625, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.28076171875, + "eval_nll_loss": 0.28125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 8.875, + "eval_rewards/rejected": 2.15625, + "eval_runtime": 4.3677, + "eval_samples_per_second": 0.916, + "eval_steps_per_second": 0.229, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.24276457284222902, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.740625023841858, + "logits/rejected": -1.671875, + "logps/chosen": -445.6000061035156, + "logps/rejected": -494.3999938964844, + "loss": 0.3975799560546875, + "memory(GiB)": 57.66, + "nll_loss": 0.39726561307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.237500190734863, + "rewards/margins": 13.087499618530273, + "rewards/rejected": 0.153076171875, + "step": 65, + "train_speed(iter/s)": 0.080487 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.2570014131550182, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.696874976158142, + "logits/rejected": -1.701562523841858, + "logps/chosen": -397.20001220703125, + "logps/rejected": -592.4000244140625, + "loss": 0.4327178955078125, + "memory(GiB)": 57.66, + "nll_loss": 0.4320312440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.399999618530273, + "rewards/margins": 13.337499618530273, + "rewards/rejected": 0.05312500149011612, + "step": 70, + "train_speed(iter/s)": 0.080636 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.278564453125, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 9.25, + "eval_rewards/rejected": 1.75, + "eval_runtime": 4.4055, + "eval_samples_per_second": 0.908, + "eval_steps_per_second": 0.227, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.42633298601984926, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.6531250476837158, + "logits/rejected": -1.609375, + "logps/chosen": -497.6000061035156, + "logps/rejected": -637.2000122070312, + "loss": 0.463232421875, + "memory(GiB)": 57.66, + "nll_loss": 0.46367186307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.300000190734863, + "rewards/margins": 13.5625, + "rewards/rejected": -0.259765625, + "step": 75, + "train_speed(iter/s)": 0.08049 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.3488843908324041, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.7218749523162842, + "logits/rejected": -1.642187476158142, + "logps/chosen": -427.20001220703125, + "logps/rejected": -584.7999877929688, + "loss": 0.45631103515625, + "memory(GiB)": 57.66, + "nll_loss": 0.4730468690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.149999618530273, + "rewards/margins": 13.762499809265137, + "rewards/rejected": -0.6175781488418579, + "step": 80, + "train_speed(iter/s)": 0.080085 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.27880859375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 9.8125, + "eval_rewards/rejected": 1.25, + "eval_runtime": 4.4925, + "eval_samples_per_second": 0.89, + "eval_steps_per_second": 0.223, + "step": 80 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 338904851939328.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d780db9cce62c6afec5a42fb7061df032125452d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c680252e14d1d2e99b069fe72abd40b3680b9a4c5bfe6e86b72afd7192436af0 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-80/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/README.md b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bb68a7d6caeadf83c94bf41a98174b4994de1bfb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-32b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..913db4c1f4c80fd71c1f6a73bf55b94a07b79467 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "k_proj", + "q_proj", + "gate_proj", + "o_proj", + "up_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1fd44567d2b88603026c33737e8ab8118b9cc606 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b657b604bcf7c01c70ea8c8350a97567a5e65dc475861a0b34b836a44c171db7 +size 134337704 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/args.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/args.json new file mode 100644 index 0000000000000000000000000000000000000000..050fd8222794164e398a2b20e0ba5c0f3e7fcbc6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-32b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e8b90e8f828f5c33f59614a703eece2ea72d88b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e79a8e103d8b2c685aafd4f05f8e4993bc0dafa9e6563014db7ab3ad04ccdad0 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0300b89f524ea4707853862665717ab6960d735d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:690a43858555706e831de234c27d54e221c6c1e5e19ad1ccb10bdac596d4e5ca +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f44f8f9fd8b530ba7beb9b6b6b820052c6125107 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba09a9108a853487e8c09149c0694bf28af4eedeb28368d1a0ebf33ea239a751 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e17ab90ed2d7ca9bc1e54b4f2f2494b923dc6694 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6a26454d52934a9823c7e371d73770a24693c7d526be825f9811ed8c2975ed +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3b212cb6d4b4db9a732d7910584ee610281afa1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2e6cbaee04932e6493babf17f83f0ddf69557907a281bbb16f5369e4df2b487 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..81063daafb8bf11ec206c8abddd5652d23388f5a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c05cdea39c3efc28c54b52b39dfbcb5a06b641c6a70a3fda99eccb1e354a134 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b8f2b40a119e645193ab54fec386a0e6b5b260b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42f6a5b8352521ebc7b9ff9010a6f4258615d7fd1ac5b5e355f8eb85d53a477e +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4834aabf1ee01a9dee977205d19c70083dd3bd8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53a6eadf1f0c3f31ce286739c32d698358fc8ec625b1615bb5afaaefcbca9f1e +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..62f872fb1d9aaed33a4fc71ee48afe248b499c66 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80390aca50881c077877400c9645fa6442f4ef0b2e7430e27ccbdfee4a7db767 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..86b7237e11d68131a1a2899bb224f927200e26c8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d300157f4c0b0093f99ef18c13feb7498361fe9b3740cedb495b0a69173b48c +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ff5bd63257d675e27ed07007b1452710554b01f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad8c30ed96b214a13f8a0e9c4a41e4e41be0cace28c54dd5e156db1a1d2868d +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2f8b1d8f0a2b705106db457027badd4e5f6ed53 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b9c95184ba528a64e814f49fd06a9b5b171eb4fe3c2dd62b7a5667c77d768fe +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b80039a193b53c0577c8ecb6e47c56b6bacd16a8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b0a9b5b40b7d1b6b7196af3d9c6fe16600aca003357962d9aeee37da034957 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8367f14b958ba2bde1d2b1fc90f1a7459fee1197 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e152f646099d4ccf09689480b9ba7d5ccee1fc1e0958521d2452833758425a6 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3f5fea75edfa3da191b52baaea356a1f300d8d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5801812ee46be8ba79ddc3a15a07734175946500058cbdb7003f35840949419a +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..275ede459c9fabbc1e293a34c4f62f03a57b427a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56969a6801036568d5a89aa1d69876922e96bed310078bbf8b1cded5118ce322 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/latest b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/latest new file mode 100644 index 0000000000000000000000000000000000000000..8e7a337e2cb23bf07023d223dd647df2d25f0fc1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/latest @@ -0,0 +1 @@ +global_step90 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e31a2394e12bf431ae13288c3d90fe4727f07fa7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feb6462d333dbc5bb5e497ea9b0adb960f7616f79e6eea63222de6d5bd559516 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d1db0a0f44aa3ac1d82c3bf8dc2d8968eeba4ce7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b045e1bfa728f51c8b51ab0faa20b128a4fbd350da006b9b39a19e24abdf5a74 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..75de18f57a056bd6a5f89df1abd045678f3f919e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76a3d058d2628a61848c2441d313f251278bd8f74ce43dc44d8cd8ad3e619a8 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fd100693bc9f3267d044ce4a16e702502dc03ec --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f72fc498e6eaa671cdc0e8a627a668b8ef607063a22ddb4edbc05e791be830 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..5aeeabfe119f1cb0c8c804f1b9a4d3049f478d69 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12889af98e175b734a788f4c5b8c4da91dd61ff3a05aaf61b9d4c66aa3dd8ad6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..91fe0f42382ab06f4d26d753745a914c9e46100e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe21a86abfceeac2cf2f48afd61a9a506cf61a287f3403f1adf391bb2ffa5a83 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..5830ca6bd04645962b6e56a00a91cd8349ca449c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73488bec91f9dee6d8105d06f99edaf4d27b6b064250d4c7023f33285b2f3132 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..343d1c0475f0dc64100dc67b09195e047f1a7bcf --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf6ee1cc2e1325b428a21172ec4e61b7220c5489751ea11c06bb66c77a0cd08 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a39c9cfeaa2d69cb5a66e83272eee65ddffaed5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b768777109679597db5d1fa24a743962bede33623e22702b13b95eab2d42cb8 +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4ddc0644b0980a508913f8ff7d367ba59b015212 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/trainer_state.json @@ -0,0 +1,528 @@ +{ + "best_metric": 0.27856445, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-70", + "epoch": 2.3684210526315788, + "eval_steps": 10, + "global_step": 90, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 4.7678369137832615, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -1.609375, + "logits/rejected": -1.609375, + "logps/chosen": -664.0, + "logps/rejected": -370.0, + "loss": 1.2783203125, + "memory(GiB)": 14.31, + "nll_loss": 0.5859375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.061081 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 5.77482904203973, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.57421875, + "logits/rejected": -1.63671875, + "logps/chosen": -676.25, + "logps/rejected": -514.0, + "loss": 1.84375, + "memory(GiB)": 24.25, + "nll_loss": 1.1826171875, + "rewards/accuracies": 0.3125, + "rewards/chosen": 0.0674591064453125, + "rewards/margins": 0.08127593994140625, + "rewards/rejected": -0.01410675048828125, + "step": 5, + "train_speed(iter/s)": 0.081107 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 4.776080083637143, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.506250023841858, + "logits/rejected": -1.532812476158142, + "logps/chosen": -573.2000122070312, + "logps/rejected": -644.0, + "loss": 1.8857421875, + "memory(GiB)": 56.21, + "nll_loss": 1.321874976158142, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.605664074420929, + "rewards/margins": 0.3480468690395355, + "rewards/rejected": 0.25761717557907104, + "step": 10, + "train_speed(iter/s)": 0.078746 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -1.6171875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -213.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.7958984375, + "eval_nll_loss": 0.55859375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 2.703125, + "eval_rewards/margins": 1.3515625, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 4.4381, + "eval_samples_per_second": 0.901, + "eval_steps_per_second": 0.225, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.4823344294382115, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.579687476158142, + "logits/rejected": -1.6171875, + "logps/chosen": -602.0, + "logps/rejected": -516.7999877929688, + "loss": 1.065966796875, + "memory(GiB)": 56.21, + "nll_loss": 0.7718750238418579, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.328125, + "rewards/margins": 1.8703124523162842, + "rewards/rejected": 1.462499976158142, + "step": 15, + "train_speed(iter/s)": 0.080005 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 0.7039423711093902, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.6484375, + "logits/rejected": -1.6218750476837158, + "logps/chosen": -375.20001220703125, + "logps/rejected": -528.0, + "loss": 0.796923828125, + "memory(GiB)": 56.21, + "nll_loss": 0.6761718988418579, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.800000190734863, + "rewards/margins": 3.1968750953674316, + "rewards/rejected": 3.6031250953674316, + "step": 20, + "train_speed(iter/s)": 0.08091 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -149.0, + "eval_logps/rejected": -1024.0, + "eval_loss": 0.3564453125, + "eval_nll_loss": 0.328125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.125, + "eval_rewards/margins": 4.21875, + "eval_rewards/rejected": 4.90625, + "eval_runtime": 4.4028, + "eval_samples_per_second": 0.909, + "eval_steps_per_second": 0.227, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.076790008093808, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.654687523841858, + "logits/rejected": -1.6124999523162842, + "logps/chosen": -546.0, + "logps/rejected": -647.0, + "loss": 0.5287841796875, + "memory(GiB)": 56.21, + "nll_loss": 0.518359363079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.699999809265137, + "rewards/margins": 6.859375, + "rewards/rejected": 0.8388671875, + "step": 25, + "train_speed(iter/s)": 0.079793 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.797995818648193, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.703125, + "logits/rejected": -1.654687523841858, + "logps/chosen": -400.79998779296875, + "logps/rejected": -590.7999877929688, + "loss": 0.4728271484375, + "memory(GiB)": 56.21, + "nll_loss": 0.4722656309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.5, + "rewards/margins": 9.725000381469727, + "rewards/rejected": -1.237695336341858, + "step": 30, + "train_speed(iter/s)": 0.080036 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.71875, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -140.0, + "eval_logps/rejected": -1080.0, + "eval_loss": 0.302978515625, + "eval_nll_loss": 0.302734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.0, + "eval_rewards/margins": 10.25, + "eval_rewards/rejected": -0.30078125, + "eval_runtime": 4.4097, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.4446633571773274, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.71875, + "logits/rejected": -1.765625, + "logps/chosen": -614.0, + "logps/rejected": -618.4000244140625, + "loss": 0.537164306640625, + "memory(GiB)": 56.21, + "nll_loss": 0.53515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.137499809265137, + "rewards/margins": 9.306249618530273, + "rewards/rejected": -0.17939452826976776, + "step": 35, + "train_speed(iter/s)": 0.079077 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.7264397691155322, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.6843750476837158, + "logits/rejected": -1.7390625476837158, + "logps/chosen": -474.0, + "logps/rejected": -646.4000244140625, + "loss": 0.5081787109375, + "memory(GiB)": 56.21, + "nll_loss": 0.563671886920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.137499809265137, + "rewards/margins": 10.050000190734863, + "rewards/rejected": 0.096435546875, + "step": 40, + "train_speed(iter/s)": 0.079662 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.7421875, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -136.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.2939453125, + "eval_nll_loss": 0.29296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.375, + "eval_rewards/margins": 9.3125, + "eval_rewards/rejected": 1.1015625, + "eval_runtime": 4.4344, + "eval_samples_per_second": 0.902, + "eval_steps_per_second": 0.226, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.4761790027876643, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.6921875476837158, + "logits/rejected": -1.610937476158142, + "logps/chosen": -440.6000061035156, + "logps/rejected": -664.7999877929688, + "loss": 0.427099609375, + "memory(GiB)": 57.66, + "nll_loss": 0.42656248807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.162500381469727, + "rewards/margins": 11.362500190734863, + "rewards/rejected": -0.18632812798023224, + "step": 45, + "train_speed(iter/s)": 0.078826 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.43416852815625473, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.78125, + "logits/rejected": -1.8156249523162842, + "logps/chosen": -489.20001220703125, + "logps/rejected": -612.7999877929688, + "loss": 0.472528076171875, + "memory(GiB)": 57.66, + "nll_loss": 0.47265625, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.875, + "rewards/margins": 11.850000381469727, + "rewards/rejected": 0.03691406175494194, + "step": 50, + "train_speed(iter/s)": 0.079352 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.8046875, + "eval_logits/rejected": -1.3125, + "eval_logps/chosen": -132.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.2861328125, + "eval_nll_loss": 0.28515625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.75, + "eval_rewards/margins": 8.9375, + "eval_rewards/rejected": 1.8515625, + "eval_runtime": 4.3892, + "eval_samples_per_second": 0.911, + "eval_steps_per_second": 0.228, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.159354970669322, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.65625, + "logits/rejected": -1.7265625, + "logps/chosen": -466.3999938964844, + "logps/rejected": -571.2000122070312, + "loss": 0.454266357421875, + "memory(GiB)": 57.66, + "nll_loss": 0.4535156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.75, + "rewards/margins": 12.324999809265137, + "rewards/rejected": 0.4175781309604645, + "step": 55, + "train_speed(iter/s)": 0.079883 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.13916645187817775, + "learning_rate": 5e-05, + "logits/chosen": -1.6593749523162842, + "logits/rejected": -1.6640625, + "logps/chosen": -506.3999938964844, + "logps/rejected": -442.0, + "loss": 0.41783447265625, + "memory(GiB)": 57.66, + "nll_loss": 0.41796875, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.387499809265137, + "rewards/margins": 13.087499618530273, + "rewards/rejected": 0.296875, + "step": 60, + "train_speed(iter/s)": 0.080545 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.765625, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.28076171875, + "eval_nll_loss": 0.28125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 8.875, + "eval_rewards/rejected": 2.15625, + "eval_runtime": 4.3677, + "eval_samples_per_second": 0.916, + "eval_steps_per_second": 0.229, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.24276457284222902, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.740625023841858, + "logits/rejected": -1.671875, + "logps/chosen": -445.6000061035156, + "logps/rejected": -494.3999938964844, + "loss": 0.3975799560546875, + "memory(GiB)": 57.66, + "nll_loss": 0.39726561307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.237500190734863, + "rewards/margins": 13.087499618530273, + "rewards/rejected": 0.153076171875, + "step": 65, + "train_speed(iter/s)": 0.080487 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.2570014131550182, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.696874976158142, + "logits/rejected": -1.701562523841858, + "logps/chosen": -397.20001220703125, + "logps/rejected": -592.4000244140625, + "loss": 0.4327178955078125, + "memory(GiB)": 57.66, + "nll_loss": 0.4320312440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.399999618530273, + "rewards/margins": 13.337499618530273, + "rewards/rejected": 0.05312500149011612, + "step": 70, + "train_speed(iter/s)": 0.080636 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.278564453125, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 9.25, + "eval_rewards/rejected": 1.75, + "eval_runtime": 4.4055, + "eval_samples_per_second": 0.908, + "eval_steps_per_second": 0.227, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.42633298601984926, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.6531250476837158, + "logits/rejected": -1.609375, + "logps/chosen": -497.6000061035156, + "logps/rejected": -637.2000122070312, + "loss": 0.463232421875, + "memory(GiB)": 57.66, + "nll_loss": 0.46367186307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.300000190734863, + "rewards/margins": 13.5625, + "rewards/rejected": -0.259765625, + "step": 75, + "train_speed(iter/s)": 0.08049 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.3488843908324041, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.7218749523162842, + "logits/rejected": -1.642187476158142, + "logps/chosen": -427.20001220703125, + "logps/rejected": -584.7999877929688, + "loss": 0.45631103515625, + "memory(GiB)": 57.66, + "nll_loss": 0.4730468690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.149999618530273, + "rewards/margins": 13.762499809265137, + "rewards/rejected": -0.6175781488418579, + "step": 80, + "train_speed(iter/s)": 0.080085 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.27880859375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 9.8125, + "eval_rewards/rejected": 1.25, + "eval_runtime": 4.4925, + "eval_samples_per_second": 0.89, + "eval_steps_per_second": 0.223, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.37174945064044734, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.642187476158142, + "logits/rejected": -1.634374976158142, + "logps/chosen": -380.79998779296875, + "logps/rejected": -698.0, + "loss": 0.3918304443359375, + "memory(GiB)": 57.66, + "nll_loss": 0.3910156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.0625, + "rewards/margins": 12.800000190734863, + "rewards/rejected": 0.25874024629592896, + "step": 85, + "train_speed(iter/s)": 0.08009 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.21082412866946396, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.623437523841858, + "logits/rejected": -1.631250023841858, + "logps/chosen": -481.6000061035156, + "logps/rejected": -547.5999755859375, + "loss": 0.42437286376953126, + "memory(GiB)": 57.66, + "nll_loss": 0.4242187440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.662500381469727, + "rewards/margins": 14.637499809265137, + "rewards/rejected": 0.0283203125, + "step": 90, + "train_speed(iter/s)": 0.08076 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.734375, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -129.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.279296875, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 9.9375, + "eval_rewards/rejected": 1.203125, + "eval_runtime": 4.3984, + "eval_samples_per_second": 0.909, + "eval_steps_per_second": 0.227, + "step": 90 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 378617444958208.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d780db9cce62c6afec5a42fb7061df032125452d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c680252e14d1d2e99b069fe72abd40b3680b9a4c5bfe6e86b72afd7192436af0 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-90/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_logits_chosen.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_logits_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..7bd75ed55ca598dceb1f3318ca79429098cf080a Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_logits_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_logits_rejected.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_logits_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..dc8d1f5ce46194b3e55743f7ea3fb8277af0b279 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_logits_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_logps_chosen.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_logps_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..265c8ed5f8ad6d0013bfb20e2edb0d630288fe18 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_logps_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_logps_rejected.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_logps_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..7da5e1088a690a99d4cb9658d6b9b83caa96f8f6 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_logps_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_loss.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..80f9f7a079fc5a318437789a965508cb81cc149d Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_nll_loss.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_nll_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..e71f08a73e0e0cc8eee8eeb47ad187da534f5c6d Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_nll_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_rewards_accuracies.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_rewards_accuracies.png new file mode 100644 index 0000000000000000000000000000000000000000..2d2698dd0f6f8ad18db2b94a0c23ae94df643a78 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_rewards_accuracies.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_rewards_chosen.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_rewards_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..91100ccf844de33ed371695148fbc30254019d8e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_rewards_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_rewards_margins.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_rewards_margins.png new file mode 100644 index 0000000000000000000000000000000000000000..6113253243755a671f3c7dfe6a4599dab6d3f125 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_rewards_margins.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_rewards_rejected.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_rewards_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..9c82fa78b5ae19466752c245e5f3a9425c4c11b0 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_rewards_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_runtime.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..1b7182a616a68ec13de0f536ec9090fa4d7b02ec Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_runtime.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_samples_per_second.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_samples_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..6ff74aed9c3912d1bbac710bfe06852dd0f9f41f Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_samples_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_steps_per_second.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_steps_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..408d11a00c59dced0fc73c8fdf2d5e9c2a5aeb0e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/eval_steps_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_epoch.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_epoch.png new file mode 100644 index 0000000000000000000000000000000000000000..c152c9bbb1100d680125e899d3d0da3db98dc221 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_epoch.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_grad_norm.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_grad_norm.png new file mode 100644 index 0000000000000000000000000000000000000000..c46484eb043a3ec1ccecc0bd0af7df30e2890ecd Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_grad_norm.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_learning_rate.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..e35e42c173f451f9666ba448872c615528441c9e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_learning_rate.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_logits_chosen.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_logits_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..1048bf3c63d0d0f30d7b47f0255677b1dcbb0d9c Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_logits_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_logits_rejected.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_logits_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..67c3aa3f51a7a4e142c99e5ceb9d1f74e7af722f Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_logits_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_logps_chosen.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_logps_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..2352a932658549445e1c32ced5eba4b327b12f7e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_logps_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_logps_rejected.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_logps_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..995d33b560be6db9aac34a0cc0b3778a87d238d5 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_logps_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_loss.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..0219c4d038fd7f7a87bbefd916d7b146e7cff515 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_memory(GiB).png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_memory(GiB).png new file mode 100644 index 0000000000000000000000000000000000000000..993ca3aa7f19ef8fdc1474131e055874fb5fccdd Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_memory(GiB).png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_nll_loss.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_nll_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..e04d57063bd1f63043b371cadde6b5bd9e896c3d Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_nll_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_rewards_accuracies.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_rewards_accuracies.png new file mode 100644 index 0000000000000000000000000000000000000000..9c67b38e866a988ab8eb3a183735075c1cefe2e3 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_rewards_accuracies.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_rewards_chosen.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_rewards_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..14d4f83f4620e620e28c7a4e84bf242243cd4371 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_rewards_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_rewards_margins.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_rewards_margins.png new file mode 100644 index 0000000000000000000000000000000000000000..443d0e1be515de776c5ddc791621c0c4338b2931 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_rewards_margins.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_rewards_rejected.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_rewards_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..d3237d005fd32c75fc3e6b8b3ab619c6dd02cce8 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_rewards_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_total_flos.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_total_flos.png new file mode 100644 index 0000000000000000000000000000000000000000..bfc59c9dd1f191a363fcc9b67d0538ab92317d82 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_total_flos.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_loss.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..06bd447aff18e96549a1cdfd90d23d70d8b7ab7a Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_runtime.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..a9e10e0aba2101d51f974c6fd43082438e5ac435 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_runtime.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_samples_per_second.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_samples_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..90616951ad3fd1e759a728302e189905a17138d9 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_samples_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_speed(iter_s).png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_speed(iter_s).png new file mode 100644 index 0000000000000000000000000000000000000000..cc73104589e7c35ee2a6937456409160052631d0 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_speed(iter_s).png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_steps_per_second.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_steps_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..5c3b9c717ae2f3cd532c7a2eb9154696900e21e5 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/images/train_train_steps_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/logging.jsonl b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/logging.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..9ccf5d492b4ce93a3352c53e52d4ee2a22bc3750 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/logging.jsonl @@ -0,0 +1,37 @@ +{"loss": 1.27832031, "grad_norm": 4.76783691, "learning_rate": 1.667e-05, "memory(GiB)": 14.31, "train_speed(iter/s)": 0.061081, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -664.0, "logps/rejected": -370.0, "logits/chosen": -1.609375, "logits/rejected": -1.609375, "nll_loss": 0.5859375, "epoch": 0.02631579, "global_step/max_steps": "1/114", "percentage": "0.88%", "elapsed_time": "12s", "remaining_time": "23m 0s"} +{"loss": 1.84375, "grad_norm": 5.77482904, "learning_rate": 8.333e-05, "memory(GiB)": 24.25, "train_speed(iter/s)": 0.081107, "rewards/chosen": 0.06745911, "rewards/rejected": -0.01410675, "rewards/accuracies": 0.3125, "rewards/margins": 0.08127594, "logps/chosen": -676.25, "logps/rejected": -514.0, "logits/chosen": -1.57421875, "logits/rejected": -1.63671875, "nll_loss": 1.18261719, "epoch": 0.13157895, "global_step/max_steps": "5/114", "percentage": "4.39%", "elapsed_time": "57s", "remaining_time": "20m 53s"} +{"loss": 1.88574219, "grad_norm": 4.77608008, "learning_rate": 9.966e-05, "memory(GiB)": 56.21, "train_speed(iter/s)": 0.078746, "rewards/chosen": 0.60566407, "rewards/rejected": 0.25761718, "rewards/accuracies": 0.69999999, "rewards/margins": 0.34804687, "logps/chosen": -573.20001221, "logps/rejected": -644.0, "logits/chosen": -1.50625002, "logits/rejected": -1.53281248, "nll_loss": 1.32187498, "epoch": 0.26315789, "global_step/max_steps": "10/114", "percentage": "8.77%", "elapsed_time": "2m 2s", "remaining_time": "21m 17s"} +{"eval_loss": 0.79589844, "eval_runtime": 4.4381, "eval_samples_per_second": 0.901, "eval_steps_per_second": 0.225, "eval_rewards/chosen": 2.703125, "eval_rewards/rejected": 1.3515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 1.3515625, "eval_logps/chosen": -213.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -1.6171875, "eval_logits/rejected": -1.2890625, "eval_nll_loss": 0.55859375, "epoch": 0.26315789, "global_step/max_steps": "10/114", "percentage": "8.77%", "elapsed_time": "2m 7s", "remaining_time": "22m 3s"} +{"loss": 1.0659668, "grad_norm": 2.48233443, "learning_rate": 9.83e-05, "memory(GiB)": 56.21, "train_speed(iter/s)": 0.080005, "rewards/chosen": 3.328125, "rewards/rejected": 1.46249998, "rewards/accuracies": 0.875, "rewards/margins": 1.87031245, "logps/chosen": -602.0, "logps/rejected": -516.79998779, "logits/chosen": -1.57968748, "logits/rejected": -1.6171875, "nll_loss": 0.77187502, "epoch": 0.39473684, "global_step/max_steps": "15/114", "percentage": "13.16%", "elapsed_time": "3m 3s", "remaining_time": "20m 10s"} +{"loss": 0.79692383, "grad_norm": 0.70394237, "learning_rate": 9.591e-05, "memory(GiB)": 56.21, "train_speed(iter/s)": 0.08091, "rewards/chosen": 6.80000019, "rewards/rejected": 3.6031251, "rewards/accuracies": 0.97500002, "rewards/margins": 3.1968751, "logps/chosen": -375.20001221, "logps/rejected": -528.0, "logits/chosen": -1.6484375, "logits/rejected": -1.62187505, "nll_loss": 0.6761719, "epoch": 0.52631579, "global_step/max_steps": "20/114", "percentage": "17.54%", "elapsed_time": "4m 3s", "remaining_time": "19m 2s"} +{"eval_loss": 0.35644531, "eval_runtime": 4.4028, "eval_samples_per_second": 0.909, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 9.125, "eval_rewards/rejected": 4.90625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 4.21875, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1024.0, "eval_logits/chosen": -1.6875, "eval_logits/rejected": -1.2890625, "eval_nll_loss": 0.328125, "epoch": 0.52631579, "global_step/max_steps": "20/114", "percentage": "17.54%", "elapsed_time": "4m 7s", "remaining_time": "19m 22s"} +{"loss": 0.52878418, "grad_norm": 1.07679001, "learning_rate": 9.256e-05, "memory(GiB)": 56.21, "train_speed(iter/s)": 0.079793, "rewards/chosen": 7.69999981, "rewards/rejected": 0.83886719, "rewards/accuracies": 1.0, "rewards/margins": 6.859375, "logps/chosen": -546.0, "logps/rejected": -647.0, "logits/chosen": -1.65468752, "logits/rejected": -1.61249995, "nll_loss": 0.51835936, "epoch": 0.65789474, "global_step/max_steps": "25/114", "percentage": "21.93%", "elapsed_time": "5m 9s", "remaining_time": "18m 20s"} +{"loss": 0.47282715, "grad_norm": 0.79799582, "learning_rate": 8.83e-05, "memory(GiB)": 56.21, "train_speed(iter/s)": 0.080036, "rewards/chosen": 8.5, "rewards/rejected": -1.23769534, "rewards/accuracies": 1.0, "rewards/margins": 9.72500038, "logps/chosen": -400.79998779, "logps/rejected": -590.79998779, "logits/chosen": -1.703125, "logits/rejected": -1.65468752, "nll_loss": 0.47226563, "epoch": 0.78947368, "global_step/max_steps": "30/114", "percentage": "26.32%", "elapsed_time": "6m 10s", "remaining_time": "17m 17s"} +{"eval_loss": 0.30297852, "eval_runtime": 4.4097, "eval_samples_per_second": 0.907, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 10.0, "eval_rewards/rejected": -0.30078125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.25, "eval_logps/chosen": -140.0, "eval_logps/rejected": -1080.0, "eval_logits/chosen": -1.71875, "eval_logits/rejected": -1.296875, "eval_nll_loss": 0.30273438, "epoch": 0.78947368, "global_step/max_steps": "30/114", "percentage": "26.32%", "elapsed_time": "6m 15s", "remaining_time": "17m 30s"} +{"loss": 0.53716431, "grad_norm": 0.44466336, "learning_rate": 8.324e-05, "memory(GiB)": 56.21, "train_speed(iter/s)": 0.079077, "rewards/chosen": 9.13749981, "rewards/rejected": -0.17939453, "rewards/accuracies": 1.0, "rewards/margins": 9.30624962, "logps/chosen": -614.0, "logps/rejected": -618.40002441, "logits/chosen": -1.71875, "logits/rejected": -1.765625, "nll_loss": 0.53515625, "epoch": 0.92105263, "global_step/max_steps": "35/114", "percentage": "30.70%", "elapsed_time": "7m 18s", "remaining_time": "16m 29s"} +{"loss": 0.50817871, "grad_norm": 0.72643977, "learning_rate": 7.748e-05, "memory(GiB)": 56.21, "train_speed(iter/s)": 0.079662, "rewards/chosen": 10.13749981, "rewards/rejected": 0.09643555, "rewards/accuracies": 1.0, "rewards/margins": 10.05000019, "logps/chosen": -474.0, "logps/rejected": -646.40002441, "logits/chosen": -1.68437505, "logits/rejected": -1.73906255, "nll_loss": 0.56367189, "epoch": 1.05263158, "global_step/max_steps": "40/114", "percentage": "35.09%", "elapsed_time": "8m 17s", "remaining_time": "15m 21s"} +{"eval_loss": 0.29394531, "eval_runtime": 4.4344, "eval_samples_per_second": 0.902, "eval_steps_per_second": 0.226, "eval_rewards/chosen": 10.375, "eval_rewards/rejected": 1.1015625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.3125, "eval_logps/chosen": -136.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.7421875, "eval_logits/rejected": -1.296875, "eval_nll_loss": 0.29296875, "epoch": 1.05263158, "global_step/max_steps": "40/114", "percentage": "35.09%", "elapsed_time": "8m 22s", "remaining_time": "15m 29s"} +{"loss": 0.42709961, "grad_norm": 0.476179, "learning_rate": 7.113e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.078826, "rewards/chosen": 11.16250038, "rewards/rejected": -0.18632813, "rewards/accuracies": 1.0, "rewards/margins": 11.36250019, "logps/chosen": -440.6000061, "logps/rejected": -664.79998779, "logits/chosen": -1.69218755, "logits/rejected": -1.61093748, "nll_loss": 0.42656249, "epoch": 1.18421053, "global_step/max_steps": "45/114", "percentage": "39.47%", "elapsed_time": "9m 26s", "remaining_time": "14m 28s"} +{"loss": 0.47252808, "grad_norm": 0.43416853, "learning_rate": 6.434e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.079352, "rewards/chosen": 11.875, "rewards/rejected": 0.03691406, "rewards/accuracies": 1.0, "rewards/margins": 11.85000038, "logps/chosen": -489.20001221, "logps/rejected": -612.79998779, "logits/chosen": -1.78125, "logits/rejected": -1.81562495, "nll_loss": 0.47265625, "epoch": 1.31578947, "global_step/max_steps": "50/114", "percentage": "43.86%", "elapsed_time": "10m 25s", "remaining_time": "13m 21s"} +{"eval_loss": 0.28613281, "eval_runtime": 4.3892, "eval_samples_per_second": 0.911, "eval_steps_per_second": 0.228, "eval_rewards/chosen": 10.75, "eval_rewards/rejected": 1.8515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 8.9375, "eval_logps/chosen": -132.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -1.8046875, "eval_logits/rejected": -1.3125, "eval_nll_loss": 0.28515625, "epoch": 1.31578947, "global_step/max_steps": "50/114", "percentage": "43.86%", "elapsed_time": "10m 30s", "remaining_time": "13m 26s"} +{"loss": 0.45426636, "grad_norm": 0.15935497, "learning_rate": 5.725e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.079883, "rewards/chosen": 12.75, "rewards/rejected": 0.41757813, "rewards/accuracies": 1.0, "rewards/margins": 12.32499981, "logps/chosen": -466.3999939, "logps/rejected": -571.20001221, "logits/chosen": -1.65625, "logits/rejected": -1.7265625, "nll_loss": 0.45351562, "epoch": 1.44736842, "global_step/max_steps": "55/114", "percentage": "48.25%", "elapsed_time": "11m 24s", "remaining_time": "12m 14s"} +{"loss": 0.41783447, "grad_norm": 0.13916645, "learning_rate": 5e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.080545, "rewards/chosen": 13.38749981, "rewards/rejected": 0.296875, "rewards/accuracies": 1.0, "rewards/margins": 13.08749962, "logps/chosen": -506.3999939, "logps/rejected": -442.0, "logits/chosen": -1.65937495, "logits/rejected": -1.6640625, "nll_loss": 0.41796875, "epoch": 1.57894737, "global_step/max_steps": "60/114", "percentage": "52.63%", "elapsed_time": "12m 20s", "remaining_time": "11m 6s"} +{"eval_loss": 0.28076172, "eval_runtime": 4.3677, "eval_samples_per_second": 0.916, "eval_steps_per_second": 0.229, "eval_rewards/chosen": 11.0, "eval_rewards/rejected": 2.15625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 8.875, "eval_logps/chosen": -130.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -1.765625, "eval_logits/rejected": -1.3046875, "eval_nll_loss": 0.28125, "epoch": 1.57894737, "global_step/max_steps": "60/114", "percentage": "52.63%", "elapsed_time": "12m 25s", "remaining_time": "11m 10s"} +{"loss": 0.39757996, "grad_norm": 0.24276457, "learning_rate": 4.275e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.080487, "rewards/chosen": 13.23750019, "rewards/rejected": 0.15307617, "rewards/accuracies": 1.0, "rewards/margins": 13.08749962, "logps/chosen": -445.6000061, "logps/rejected": -494.3999939, "logits/chosen": -1.74062502, "logits/rejected": -1.671875, "nll_loss": 0.39726561, "epoch": 1.71052632, "global_step/max_steps": "65/114", "percentage": "57.02%", "elapsed_time": "13m 23s", "remaining_time": "10m 5s"} +{"loss": 0.4327179, "grad_norm": 0.25700141, "learning_rate": 3.566e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.080636, "rewards/chosen": 13.39999962, "rewards/rejected": 0.053125, "rewards/accuracies": 1.0, "rewards/margins": 13.33749962, "logps/chosen": -397.20001221, "logps/rejected": -592.40002441, "logits/chosen": -1.69687498, "logits/rejected": -1.70156252, "nll_loss": 0.43203124, "epoch": 1.84210526, "global_step/max_steps": "70/114", "percentage": "61.40%", "elapsed_time": "14m 23s", "remaining_time": "9m 3s"} +{"eval_loss": 0.27856445, "eval_runtime": 4.4055, "eval_samples_per_second": 0.908, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 11.0, "eval_rewards/rejected": 1.75, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.25, "eval_logps/chosen": -130.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -1.7265625, "eval_logits/rejected": -1.2890625, "eval_nll_loss": 0.27929688, "epoch": 1.84210526, "global_step/max_steps": "70/114", "percentage": "61.40%", "elapsed_time": "14m 28s", "remaining_time": "9m 5s"} +{"loss": 0.46323242, "grad_norm": 0.42633299, "learning_rate": 2.887e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.08049, "rewards/chosen": 13.30000019, "rewards/rejected": -0.25976562, "rewards/accuracies": 1.0, "rewards/margins": 13.5625, "logps/chosen": -497.6000061, "logps/rejected": -637.20001221, "logits/chosen": -1.65312505, "logits/rejected": -1.609375, "nll_loss": 0.46367186, "epoch": 1.97368421, "global_step/max_steps": "75/114", "percentage": "65.79%", "elapsed_time": "15m 27s", "remaining_time": "8m 2s"} +{"loss": 0.45631104, "grad_norm": 0.34888439, "learning_rate": 2.252e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.080085, "rewards/chosen": 13.14999962, "rewards/rejected": -0.61757815, "rewards/accuracies": 1.0, "rewards/margins": 13.76249981, "logps/chosen": -427.20001221, "logps/rejected": -584.79998779, "logits/chosen": -1.72187495, "logits/rejected": -1.64218748, "nll_loss": 0.47304687, "epoch": 2.10526316, "global_step/max_steps": "80/114", "percentage": "70.18%", "elapsed_time": "16m 34s", "remaining_time": "7m 2s"} +{"eval_loss": 0.27880859, "eval_runtime": 4.4925, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.223, "eval_rewards/chosen": 11.0625, "eval_rewards/rejected": 1.25, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.8125, "eval_logps/chosen": -130.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.7265625, "eval_logits/rejected": -1.28125, "eval_nll_loss": 0.27929688, "epoch": 2.10526316, "global_step/max_steps": "80/114", "percentage": "70.18%", "elapsed_time": "16m 39s", "remaining_time": "7m 4s"} +{"loss": 0.39183044, "grad_norm": 0.37174945, "learning_rate": 1.676e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.08009, "rewards/chosen": 13.0625, "rewards/rejected": 0.25874025, "rewards/accuracies": 1.0, "rewards/margins": 12.80000019, "logps/chosen": -380.79998779, "logps/rejected": -698.0, "logits/chosen": -1.64218748, "logits/rejected": -1.63437498, "nll_loss": 0.39101562, "epoch": 2.23684211, "global_step/max_steps": "85/114", "percentage": "74.56%", "elapsed_time": "17m 37s", "remaining_time": "6m 0s"} +{"loss": 0.42437286, "grad_norm": 0.21082413, "learning_rate": 1.17e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.08076, "rewards/chosen": 14.66250038, "rewards/rejected": 0.02832031, "rewards/accuracies": 1.0, "rewards/margins": 14.63749981, "logps/chosen": -481.6000061, "logps/rejected": -547.59997559, "logits/chosen": -1.62343752, "logits/rejected": -1.63125002, "nll_loss": 0.42421874, "epoch": 2.36842105, "global_step/max_steps": "90/114", "percentage": "78.95%", "elapsed_time": "18m 30s", "remaining_time": "4m 56s"} +{"eval_loss": 0.27929688, "eval_runtime": 4.3984, "eval_samples_per_second": 0.909, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 11.125, "eval_rewards/rejected": 1.203125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.9375, "eval_logps/chosen": -129.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.734375, "eval_logits/rejected": -1.28125, "eval_nll_loss": 0.27929688, "epoch": 2.36842105, "global_step/max_steps": "90/114", "percentage": "78.95%", "elapsed_time": "18m 34s", "remaining_time": "4m 57s"} +{"loss": 0.44733887, "grad_norm": 0.14903932, "learning_rate": 7.44e-06, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.080249, "rewards/chosen": 14.73750019, "rewards/rejected": -0.85624999, "rewards/accuracies": 1.0, "rewards/margins": 15.58749962, "logps/chosen": -511.6000061, "logps/rejected": -677.20001221, "logits/chosen": -1.69218755, "logits/rejected": -1.65937495, "nll_loss": 0.44726562, "epoch": 2.5, "global_step/max_steps": "95/114", "percentage": "83.33%", "elapsed_time": "19m 39s", "remaining_time": "3m 55s"} +{"loss": 0.39319458, "grad_norm": 0.26769718, "learning_rate": 4.09e-06, "memory(GiB)": 66.91, "train_speed(iter/s)": 0.080703, "rewards/chosen": 13.14999962, "rewards/rejected": -0.92285156, "rewards/accuracies": 1.0, "rewards/margins": 14.0625, "logps/chosen": -397.0, "logps/rejected": -521.59997559, "logits/chosen": -1.67812502, "logits/rejected": -1.63906252, "nll_loss": 0.39335936, "epoch": 2.63157895, "global_step/max_steps": "100/114", "percentage": "87.72%", "elapsed_time": "20m 34s", "remaining_time": "2m 52s"} +{"eval_loss": 0.27905273, "eval_runtime": 4.4148, "eval_samples_per_second": 0.906, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 11.125, "eval_rewards/rejected": 1.203125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.9375, "eval_logps/chosen": -129.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.7265625, "eval_logits/rejected": -1.28125, "eval_nll_loss": 0.27929688, "epoch": 2.63157895, "global_step/max_steps": "100/114", "percentage": "87.72%", "elapsed_time": "20m 39s", "remaining_time": "2m 53s"} +{"loss": 0.41911621, "grad_norm": 0.42075165, "learning_rate": 1.7e-06, "memory(GiB)": 66.91, "train_speed(iter/s)": 0.080527, "rewards/chosen": 13.76249981, "rewards/rejected": -0.4705078, "rewards/accuracies": 1.0, "rewards/margins": 14.22500038, "logps/chosen": -429.6499939, "logps/rejected": -526.40002441, "logits/chosen": -1.66875005, "logits/rejected": -1.70624995, "nll_loss": 0.41874999, "epoch": 2.76315789, "global_step/max_steps": "105/114", "percentage": "92.11%", "elapsed_time": "21m 39s", "remaining_time": "1m 51s"} +{"loss": 0.38078003, "grad_norm": 0.3068828, "learning_rate": 3.4e-07, "memory(GiB)": 66.91, "train_speed(iter/s)": 0.0807, "rewards/chosen": 14.35000038, "rewards/rejected": -0.80195314, "rewards/accuracies": 1.0, "rewards/margins": 15.16250038, "logps/chosen": -446.79998779, "logps/rejected": -566.40002441, "logits/chosen": -1.69375002, "logits/rejected": -1.6875, "nll_loss": 0.38066405, "epoch": 2.89473684, "global_step/max_steps": "110/114", "percentage": "96.49%", "elapsed_time": "22m 38s", "remaining_time": "49s"} +{"eval_loss": 0.27880859, "eval_runtime": 4.4089, "eval_samples_per_second": 0.907, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 11.125, "eval_rewards/rejected": 1.0, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.125, "eval_logps/chosen": -129.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.7265625, "eval_logits/rejected": -1.2734375, "eval_nll_loss": 0.27929688, "epoch": 2.89473684, "global_step/max_steps": "110/114", "percentage": "96.49%", "elapsed_time": "22m 43s", "remaining_time": "49s"} +{"eval_loss": 0.27758789, "eval_runtime": 4.3398, "eval_samples_per_second": 0.922, "eval_steps_per_second": 0.23, "eval_rewards/chosen": 11.1875, "eval_rewards/rejected": 1.0, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.1875, "eval_logps/chosen": -128.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.734375, "eval_logits/rejected": -1.2734375, "eval_nll_loss": 0.27734375, "epoch": 3.0, "global_step/max_steps": "114/114", "percentage": "100.00%", "elapsed_time": "23m 43s", "remaining_time": "0s"} +{"train_runtime": 1425.9789, "train_samples_per_second": 0.627, "train_steps_per_second": 0.08, "total_flos": 479640253628416.0, "train_loss": 0.61656048, "epoch": 3.0, "global_step/max_steps": "114/114", "percentage": "100.00%", "elapsed_time": "23m 45s", "remaining_time": "0s"} +{"train_dataset": "1698.815436±897.000106, min=182.000000, max=4081.000000, size=298", "val_dataset": "1637.250000±797.581461, min=755.000000, max=2485.000000, size=4", "model_parameter_info": "PeftModelForCausalLM: 32830.9852M Params (67.1089M Trainable [0.2044%]), 0.0001M Buffers.", "last_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114", "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/checkpoint-114", "best_metric": 0.27758789, "global_step": 114, "log_history": [{"loss": 1.2783203125, "grad_norm": 4.7678369137832615, "learning_rate": 1.6666666666666667e-05, "memory(GiB)": 14.31, "train_speed(iter/s)": 0.061081, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -664.0, "logps/rejected": -370.0, "logits/chosen": -1.609375, "logits/rejected": -1.609375, "nll_loss": 0.5859375, "epoch": 0.02631578947368421, "step": 1}, {"loss": 1.84375, "grad_norm": 5.77482904203973, "learning_rate": 8.333333333333334e-05, "memory(GiB)": 24.25, "train_speed(iter/s)": 0.081107, "rewards/chosen": 0.0674591064453125, "rewards/rejected": -0.01410675048828125, "rewards/accuracies": 0.3125, "rewards/margins": 0.08127593994140625, "logps/chosen": -676.25, "logps/rejected": -514.0, "logits/chosen": -1.57421875, "logits/rejected": -1.63671875, "nll_loss": 1.1826171875, "epoch": 0.13157894736842105, "step": 5}, {"loss": 1.8857421875, "grad_norm": 4.776080083637143, "learning_rate": 9.966191788709716e-05, "memory(GiB)": 56.21, "train_speed(iter/s)": 0.078746, "rewards/chosen": 0.605664074420929, "rewards/rejected": 0.25761717557907104, "rewards/accuracies": 0.699999988079071, "rewards/margins": 0.3480468690395355, "logps/chosen": -573.2000122070312, "logps/rejected": -644.0, "logits/chosen": -1.506250023841858, "logits/rejected": -1.532812476158142, "nll_loss": 1.321874976158142, "epoch": 0.2631578947368421, "step": 10}, {"eval_loss": 0.7958984375, "eval_runtime": 4.4381, "eval_samples_per_second": 0.901, "eval_steps_per_second": 0.225, "eval_rewards/chosen": 2.703125, "eval_rewards/rejected": 1.3515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 1.3515625, "eval_logps/chosen": -213.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -1.6171875, "eval_logits/rejected": -1.2890625, "eval_nll_loss": 0.55859375, "epoch": 0.2631578947368421, "step": 10}, {"loss": 1.065966796875, "grad_norm": 2.4823344294382115, "learning_rate": 9.829629131445342e-05, "memory(GiB)": 56.21, "train_speed(iter/s)": 0.080005, "rewards/chosen": 3.328125, "rewards/rejected": 1.462499976158142, "rewards/accuracies": 0.875, "rewards/margins": 1.8703124523162842, "logps/chosen": -602.0, "logps/rejected": -516.7999877929688, "logits/chosen": -1.579687476158142, "logits/rejected": -1.6171875, "nll_loss": 0.7718750238418579, "epoch": 0.39473684210526316, "step": 15}, {"loss": 0.796923828125, "grad_norm": 0.7039423711093902, "learning_rate": 9.591080534401371e-05, "memory(GiB)": 56.21, "train_speed(iter/s)": 0.08091, "rewards/chosen": 6.800000190734863, "rewards/rejected": 3.6031250953674316, "rewards/accuracies": 0.9750000238418579, "rewards/margins": 3.1968750953674316, "logps/chosen": -375.20001220703125, "logps/rejected": -528.0, "logits/chosen": -1.6484375, "logits/rejected": -1.6218750476837158, "nll_loss": 0.6761718988418579, "epoch": 0.5263157894736842, "step": 20}, {"eval_loss": 0.3564453125, "eval_runtime": 4.4028, "eval_samples_per_second": 0.909, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 9.125, "eval_rewards/rejected": 4.90625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 4.21875, "eval_logps/chosen": -149.0, "eval_logps/rejected": -1024.0, "eval_logits/chosen": -1.6875, "eval_logits/rejected": -1.2890625, "eval_nll_loss": 0.328125, "epoch": 0.5263157894736842, "step": 20}, {"loss": 0.5287841796875, "grad_norm": 1.076790008093808, "learning_rate": 9.255583362184999e-05, "memory(GiB)": 56.21, "train_speed(iter/s)": 0.079793, "rewards/chosen": 7.699999809265137, "rewards/rejected": 0.8388671875, "rewards/accuracies": 1.0, "rewards/margins": 6.859375, "logps/chosen": -546.0, "logps/rejected": -647.0, "logits/chosen": -1.654687523841858, "logits/rejected": -1.6124999523162842, "nll_loss": 0.518359363079071, "epoch": 0.6578947368421053, "step": 25}, {"loss": 0.4728271484375, "grad_norm": 0.797995818648193, "learning_rate": 8.83022221559489e-05, "memory(GiB)": 56.21, "train_speed(iter/s)": 0.080036, "rewards/chosen": 8.5, "rewards/rejected": -1.237695336341858, "rewards/accuracies": 1.0, "rewards/margins": 9.725000381469727, "logps/chosen": -400.79998779296875, "logps/rejected": -590.7999877929688, "logits/chosen": -1.703125, "logits/rejected": -1.654687523841858, "nll_loss": 0.4722656309604645, "epoch": 0.7894736842105263, "step": 30}, {"eval_loss": 0.302978515625, "eval_runtime": 4.4097, "eval_samples_per_second": 0.907, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 10.0, "eval_rewards/rejected": -0.30078125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.25, "eval_logps/chosen": -140.0, "eval_logps/rejected": -1080.0, "eval_logits/chosen": -1.71875, "eval_logits/rejected": -1.296875, "eval_nll_loss": 0.302734375, "epoch": 0.7894736842105263, "step": 30}, {"loss": 0.537164306640625, "grad_norm": 0.4446633571773274, "learning_rate": 8.323979328069689e-05, "memory(GiB)": 56.21, "train_speed(iter/s)": 0.079077, "rewards/chosen": 9.137499809265137, "rewards/rejected": -0.17939452826976776, "rewards/accuracies": 1.0, "rewards/margins": 9.306249618530273, "logps/chosen": -614.0, "logps/rejected": -618.4000244140625, "logits/chosen": -1.71875, "logits/rejected": -1.765625, "nll_loss": 0.53515625, "epoch": 0.9210526315789473, "step": 35}, {"loss": 0.5081787109375, "grad_norm": 0.7264397691155322, "learning_rate": 7.74754489035403e-05, "memory(GiB)": 56.21, "train_speed(iter/s)": 0.079662, "rewards/chosen": 10.137499809265137, "rewards/rejected": 0.096435546875, "rewards/accuracies": 1.0, "rewards/margins": 10.050000190734863, "logps/chosen": -474.0, "logps/rejected": -646.4000244140625, "logits/chosen": -1.6843750476837158, "logits/rejected": -1.7390625476837158, "nll_loss": 0.563671886920929, "epoch": 1.0526315789473684, "step": 40}, {"eval_loss": 0.2939453125, "eval_runtime": 4.4344, "eval_samples_per_second": 0.902, "eval_steps_per_second": 0.226, "eval_rewards/chosen": 10.375, "eval_rewards/rejected": 1.1015625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.3125, "eval_logps/chosen": -136.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.7421875, "eval_logits/rejected": -1.296875, "eval_nll_loss": 0.29296875, "epoch": 1.0526315789473684, "step": 40}, {"loss": 0.427099609375, "grad_norm": 0.4761790027876643, "learning_rate": 7.113091308703498e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.078826, "rewards/chosen": 11.162500381469727, "rewards/rejected": -0.18632812798023224, "rewards/accuracies": 1.0, "rewards/margins": 11.362500190734863, "logps/chosen": -440.6000061035156, "logps/rejected": -664.7999877929688, "logits/chosen": -1.6921875476837158, "logits/rejected": -1.610937476158142, "nll_loss": 0.42656248807907104, "epoch": 1.1842105263157894, "step": 45}, {"loss": 0.472528076171875, "grad_norm": 0.43416852815625473, "learning_rate": 6.434016163555452e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.079352, "rewards/chosen": 11.875, "rewards/rejected": 0.03691406175494194, "rewards/accuracies": 1.0, "rewards/margins": 11.850000381469727, "logps/chosen": -489.20001220703125, "logps/rejected": -612.7999877929688, "logits/chosen": -1.78125, "logits/rejected": -1.8156249523162842, "nll_loss": 0.47265625, "epoch": 1.3157894736842106, "step": 50}, {"eval_loss": 0.2861328125, "eval_runtime": 4.3892, "eval_samples_per_second": 0.911, "eval_steps_per_second": 0.228, "eval_rewards/chosen": 10.75, "eval_rewards/rejected": 1.8515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 8.9375, "eval_logps/chosen": -132.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -1.8046875, "eval_logits/rejected": -1.3125, "eval_nll_loss": 0.28515625, "epoch": 1.3157894736842106, "step": 50}, {"loss": 0.454266357421875, "grad_norm": 0.159354970669322, "learning_rate": 5.724659296536233e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.079883, "rewards/chosen": 12.75, "rewards/rejected": 0.4175781309604645, "rewards/accuracies": 1.0, "rewards/margins": 12.324999809265137, "logps/chosen": -466.3999938964844, "logps/rejected": -571.2000122070312, "logits/chosen": -1.65625, "logits/rejected": -1.7265625, "nll_loss": 0.4535156190395355, "epoch": 1.4473684210526316, "step": 55}, {"loss": 0.41783447265625, "grad_norm": 0.13916645187817775, "learning_rate": 5e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.080545, "rewards/chosen": 13.387499809265137, "rewards/rejected": 0.296875, "rewards/accuracies": 1.0, "rewards/margins": 13.087499618530273, "logps/chosen": -506.3999938964844, "logps/rejected": -442.0, "logits/chosen": -1.6593749523162842, "logits/rejected": -1.6640625, "nll_loss": 0.41796875, "epoch": 1.5789473684210527, "step": 60}, {"eval_loss": 0.28076171875, "eval_runtime": 4.3677, "eval_samples_per_second": 0.916, "eval_steps_per_second": 0.229, "eval_rewards/chosen": 11.0, "eval_rewards/rejected": 2.15625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 8.875, "eval_logps/chosen": -130.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -1.765625, "eval_logits/rejected": -1.3046875, "eval_nll_loss": 0.28125, "epoch": 1.5789473684210527, "step": 60}, {"loss": 0.3975799560546875, "grad_norm": 0.24276457284222902, "learning_rate": 4.275340703463767e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.080487, "rewards/chosen": 13.237500190734863, "rewards/rejected": 0.153076171875, "rewards/accuracies": 1.0, "rewards/margins": 13.087499618530273, "logps/chosen": -445.6000061035156, "logps/rejected": -494.3999938964844, "logits/chosen": -1.740625023841858, "logits/rejected": -1.671875, "nll_loss": 0.39726561307907104, "epoch": 1.7105263157894737, "step": 65}, {"loss": 0.4327178955078125, "grad_norm": 0.2570014131550182, "learning_rate": 3.5659838364445505e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.080636, "rewards/chosen": 13.399999618530273, "rewards/rejected": 0.05312500149011612, "rewards/accuracies": 1.0, "rewards/margins": 13.337499618530273, "logps/chosen": -397.20001220703125, "logps/rejected": -592.4000244140625, "logits/chosen": -1.696874976158142, "logits/rejected": -1.701562523841858, "nll_loss": 0.4320312440395355, "epoch": 1.8421052631578947, "step": 70}, {"eval_loss": 0.278564453125, "eval_runtime": 4.4055, "eval_samples_per_second": 0.908, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 11.0, "eval_rewards/rejected": 1.75, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.25, "eval_logps/chosen": -130.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -1.7265625, "eval_logits/rejected": -1.2890625, "eval_nll_loss": 0.279296875, "epoch": 1.8421052631578947, "step": 70}, {"loss": 0.463232421875, "grad_norm": 0.42633298601984926, "learning_rate": 2.886908691296504e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.08049, "rewards/chosen": 13.300000190734863, "rewards/rejected": -0.259765625, "rewards/accuracies": 1.0, "rewards/margins": 13.5625, "logps/chosen": -497.6000061035156, "logps/rejected": -637.2000122070312, "logits/chosen": -1.6531250476837158, "logits/rejected": -1.609375, "nll_loss": 0.46367186307907104, "epoch": 1.973684210526316, "step": 75}, {"loss": 0.45631103515625, "grad_norm": 0.3488843908324041, "learning_rate": 2.25245510964597e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.080085, "rewards/chosen": 13.149999618530273, "rewards/rejected": -0.6175781488418579, "rewards/accuracies": 1.0, "rewards/margins": 13.762499809265137, "logps/chosen": -427.20001220703125, "logps/rejected": -584.7999877929688, "logits/chosen": -1.7218749523162842, "logits/rejected": -1.642187476158142, "nll_loss": 0.4730468690395355, "epoch": 2.1052631578947367, "step": 80}, {"eval_loss": 0.27880859375, "eval_runtime": 4.4925, "eval_samples_per_second": 0.89, "eval_steps_per_second": 0.223, "eval_rewards/chosen": 11.0625, "eval_rewards/rejected": 1.25, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.8125, "eval_logps/chosen": -130.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.7265625, "eval_logits/rejected": -1.28125, "eval_nll_loss": 0.279296875, "epoch": 2.1052631578947367, "step": 80}, {"loss": 0.3918304443359375, "grad_norm": 0.37174945064044734, "learning_rate": 1.6760206719303105e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.08009, "rewards/chosen": 13.0625, "rewards/rejected": 0.25874024629592896, "rewards/accuracies": 1.0, "rewards/margins": 12.800000190734863, "logps/chosen": -380.79998779296875, "logps/rejected": -698.0, "logits/chosen": -1.642187476158142, "logits/rejected": -1.634374976158142, "nll_loss": 0.3910156190395355, "epoch": 2.236842105263158, "step": 85}, {"loss": 0.42437286376953126, "grad_norm": 0.21082412866946396, "learning_rate": 1.1697777844051105e-05, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.08076, "rewards/chosen": 14.662500381469727, "rewards/rejected": 0.0283203125, "rewards/accuracies": 1.0, "rewards/margins": 14.637499809265137, "logps/chosen": -481.6000061035156, "logps/rejected": -547.5999755859375, "logits/chosen": -1.623437523841858, "logits/rejected": -1.631250023841858, "nll_loss": 0.4242187440395355, "epoch": 2.3684210526315788, "step": 90}, {"eval_loss": 0.279296875, "eval_runtime": 4.3984, "eval_samples_per_second": 0.909, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 11.125, "eval_rewards/rejected": 1.203125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.9375, "eval_logps/chosen": -129.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.734375, "eval_logits/rejected": -1.28125, "eval_nll_loss": 0.279296875, "epoch": 2.3684210526315788, "step": 90}, {"loss": 0.4473388671875, "grad_norm": 0.14903932297946823, "learning_rate": 7.444166378150013e-06, "memory(GiB)": 57.66, "train_speed(iter/s)": 0.080249, "rewards/chosen": 14.737500190734863, "rewards/rejected": -0.856249988079071, "rewards/accuracies": 1.0, "rewards/margins": 15.587499618530273, "logps/chosen": -511.6000061035156, "logps/rejected": -677.2000122070312, "logits/chosen": -1.6921875476837158, "logits/rejected": -1.6593749523162842, "nll_loss": 0.447265625, "epoch": 2.5, "step": 95}, {"loss": 0.393194580078125, "grad_norm": 0.2676971755335469, "learning_rate": 4.089194655986306e-06, "memory(GiB)": 66.91, "train_speed(iter/s)": 0.080703, "rewards/chosen": 13.149999618530273, "rewards/rejected": -0.9228515625, "rewards/accuracies": 1.0, "rewards/margins": 14.0625, "logps/chosen": -397.0, "logps/rejected": -521.5999755859375, "logits/chosen": -1.678125023841858, "logits/rejected": -1.639062523841858, "nll_loss": 0.39335936307907104, "epoch": 2.6315789473684212, "step": 100}, {"eval_loss": 0.279052734375, "eval_runtime": 4.4148, "eval_samples_per_second": 0.906, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 11.125, "eval_rewards/rejected": 1.203125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.9375, "eval_logps/chosen": -129.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.7265625, "eval_logits/rejected": -1.28125, "eval_nll_loss": 0.279296875, "epoch": 2.6315789473684212, "step": 100}, {"loss": 0.4191162109375, "grad_norm": 0.4207516520255958, "learning_rate": 1.70370868554659e-06, "memory(GiB)": 66.91, "train_speed(iter/s)": 0.080527, "rewards/chosen": 13.762499809265137, "rewards/rejected": -0.47050780057907104, "rewards/accuracies": 1.0, "rewards/margins": 14.225000381469727, "logps/chosen": -429.6499938964844, "logps/rejected": -526.4000244140625, "logits/chosen": -1.6687500476837158, "logits/rejected": -1.7062499523162842, "nll_loss": 0.41874998807907104, "epoch": 2.763157894736842, "step": 105}, {"loss": 0.380780029296875, "grad_norm": 0.3068828002596679, "learning_rate": 3.380821129028489e-07, "memory(GiB)": 66.91, "train_speed(iter/s)": 0.0807, "rewards/chosen": 14.350000381469727, "rewards/rejected": -0.801953136920929, "rewards/accuracies": 1.0, "rewards/margins": 15.162500381469727, "logps/chosen": -446.79998779296875, "logps/rejected": -566.4000244140625, "logits/chosen": -1.693750023841858, "logits/rejected": -1.6875, "nll_loss": 0.38066405057907104, "epoch": 2.8947368421052633, "step": 110}, {"eval_loss": 0.27880859375, "eval_runtime": 4.4089, "eval_samples_per_second": 0.907, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 11.125, "eval_rewards/rejected": 1.0, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.125, "eval_logps/chosen": -129.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.7265625, "eval_logits/rejected": -1.2734375, "eval_nll_loss": 0.279296875, "epoch": 2.8947368421052633, "step": 110}, {"eval_loss": 0.277587890625, "eval_runtime": 4.3398, "eval_samples_per_second": 0.922, "eval_steps_per_second": 0.23, "eval_rewards/chosen": 11.1875, "eval_rewards/rejected": 1.0, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.1875, "eval_logps/chosen": -128.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.734375, "eval_logits/rejected": -1.2734375, "eval_nll_loss": 0.27734375, "epoch": 3.0, "step": 114}, {"train_runtime": 1425.9789, "train_samples_per_second": 0.627, "train_steps_per_second": 0.08, "total_flos": 479640253628416.0, "train_loss": 0.6165604842336554, "epoch": 3.0, "step": 114}], "memory": 66.912109375} diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs/events.out.tfevents.1739620634.kml-task-540432-record-10144729-prod-worker-0.3420.0 b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs/events.out.tfevents.1739620634.kml-task-540432-record-10144729-prod-worker-0.3420.0 new file mode 100644 index 0000000000000000000000000000000000000000..3143bc4626303a1f757cd26f39278ad159dece16 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-115532/runs/events.out.tfevents.1739620634.kml-task-540432-record-10144729-prod-worker-0.3420.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:251a69119862399f0229c7df6f3526bb5515f9a8ce39fc22a25e4864894e334c +size 36881 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/args.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/args.json new file mode 100644 index 0000000000000000000000000000000000000000..116251445760e59feb4fe2e3015344d18d3f7253 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-32b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/README.md b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bb68a7d6caeadf83c94bf41a98174b4994de1bfb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-32b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37fbac01e884b4737b62d9f8690242edcb865230 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "k_proj", + "v_proj", + "q_proj", + "up_proj", + "gate_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f1512fa3de358a8198dbbcfb2887691b7baa241 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e027df868fa546c05790ae770b8fccf6a8712e478b2fce68a37bbe2d3ab60e83 +size 134337704 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/args.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/args.json new file mode 100644 index 0000000000000000000000000000000000000000..116251445760e59feb4fe2e3015344d18d3f7253 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-32b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c1b4e542f92d25c560ef590dab91efff7b63652 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ce0773476c0ac02dacfc996fac6493181889660e03c0d26f87f51b72353d202 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..239cf925485c6768b34584f7940ce51294a631ff --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f209496d1b2cdcc9f3284babdd0568223835dc82da022283939ffa4245dd3661 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca7e97bb701b2146431445c9cf07f2f6863d7ad1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89a45a4c057351f28ea3ea3d37fa9ae1d7d52a7335bff6688ea72ca1938634d6 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac9dd78faaeb0941c9f5377b1574d7ee652e948a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c38e8d84b91a5a0763bd37512dd8ca171bf88155a6ae16520c3f4edcb724175 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..990e5155cee409df97840913084fb96f8e913811 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76e2c3b4c169145aa72f3d4ef591107603503578297ad78498d3e99e3055d5e5 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..802aff866a83b3a297f1df01f481d93c4ab557de --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:530323a336584330acd7472711e6c7f976c1927244a3de72c9042a91490876a7 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a244a1466ddd18803e6ed85a3689c044fe8701b9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b430d47f51d4d63ddcd17857c4521b7d1b3bac1c55c7925a5cb18b1bc3c0fb3 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..08766eb3cc1411b474264aedc3e277de0172edae --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c313a92c00fae9f515fb345e53c023132a4a80977e60eadcf4f5fbb8d069baa8 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c7ae4432113ad4b11cf31e67d4c4a37f9f89937 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddfc67b3a517ef961c3b5705cc7acf2914eb49d3c4dc2992884fa09449846b3e +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4be062e0eceb3e9ac20c1d41d4b0cf14db42ac9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a4df242d6635400e57556b99edee63cfff3a968ca0a2755b2ceef1f9012b05e +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b337383b9f00724e70df2259eb88dce09a19e1c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00564d9858ab14b7a140837c454f065a2420dc403617f17cb8a2e5118cd86eaa +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b9ba5870d8bb67867bda6c28f98a0ed4dbc9ca0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:329d6ca2ce6026f0136c01384891858b0084fc96c796d6096100f085374bb838 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..92251b57cb07b3bbffb2cf4e89796f689851c1bb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bee250f073179786701924268a7356463dcb5ab4bb216a0319d44c81b37259f3 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b940ead0174e2c9a9d429788757291a89c76828b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:642dc60def74ca84e35dfdf6296aa8289a65944e65a892aff15bf6e6547a71e0 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ad47e5822c97d26058f7cd02f16ce9d56ce270f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e67b4b0b03435aab670516d7b779bd2586af114df30a200bc2fd9fa37b993aec +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..508375eb6c9c97918bb0d80d1c9da8d7c15173c1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbfc7e2779ddf3c8f005358467f333da4c7d239138fd358ac7f6095abfcd63d1 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/latest b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/latest new file mode 100644 index 0000000000000000000000000000000000000000..744ae7dbad571b6f37ec6c7066549494261bb59e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/latest @@ -0,0 +1 @@ +global_step100 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..97f51b498d48145bd9cc14b35f8236b9ec95a4f7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1bec598899f9d59e70c1b4705ce420a1e0a670957b6c8153a589880068ae5a4 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..08e59ac81067b262a084604cd3392250166c2841 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60d2348aae518f4c44693db9c9b4b3a3299c556e7f0a86c188b2e4c3e364a7c +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..20a24c17b4be2ee59cd5e6682010519318a91e58 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe5a79d3bcb4ce033de360bc765e616316e3562aba25887cd85c4adbb935abf +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..54050f6cf8fb847e2a926e14a7aad2647761521a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a9d1f6e22677721841890e6a27855857e6840137650d609eb8e4ac13b71d29 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..263aae475c49b090bce43f143308192c5bf9a95b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac4ff84388a6a4fe3bcae6207c68b2ee5528fb3b6de8cc3588fe1975462aa5 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..942ed5d60ae87dce686b33da76a34db404036dc6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fce3cdf5c1b8a8a291e0c73b384e3ad5252640e21e942b44b26b8b0928ffa9 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..57789be3df3983cb8acc1500bf6470ffadb1c578 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:919e675f3bcaf4f3c8ba35cd8debf85aec3bbc3c8e5019b74431e0a314e4d37a +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..b32d6e2e7eb7148713b473b0c821a98e616ab6e6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf6479ce82b88efc6a72a8ee512162b3d0ecab972817296d38ab9c448bb8d96 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2a1fb08c48e9d34df783eb19e7c9d1caf0ed386 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ec37c3a15b8d061312402391f2fddb52d623a1416d6d2879a30f184450d844f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..199929265d7b9b78a22abf8697c488d0925c12f4 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/trainer_state.json @@ -0,0 +1,581 @@ +{ + "best_metric": 0.27758789, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90", + "epoch": 2.6315789473684212, + "eval_steps": 10, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 4.767905865350689, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -1.609375, + "logits/rejected": -1.609375, + "logps/chosen": -664.0, + "logps/rejected": -370.0, + "loss": 1.2783203125, + "memory(GiB)": 14.31, + "nll_loss": 0.5859375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.059163 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 5.720924346118727, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.57421875, + "logits/rejected": -1.640625, + "logps/chosen": -676.25, + "logps/rejected": -513.5, + "loss": 1.839111328125, + "memory(GiB)": 24.25, + "nll_loss": 1.1826171875, + "rewards/accuracies": 0.34375, + "rewards/chosen": 0.082916259765625, + "rewards/margins": 0.09368896484375, + "rewards/rejected": -0.0110015869140625, + "step": 5, + "train_speed(iter/s)": 0.080606 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 4.536016569461848, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.5046875476837158, + "logits/rejected": -1.5343749523162842, + "logps/chosen": -572.0, + "logps/rejected": -645.5999755859375, + "loss": 1.83935546875, + "memory(GiB)": 56.43, + "nll_loss": 1.3250000476837158, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.6597656011581421, + "rewards/margins": 0.4574218690395355, + "rewards/rejected": 0.20273438096046448, + "step": 10, + "train_speed(iter/s)": 0.078543 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -1.6171875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -213.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.77734375, + "eval_nll_loss": 0.5546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 2.75, + "eval_rewards/margins": 1.3984375, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 4.3928, + "eval_samples_per_second": 0.911, + "eval_steps_per_second": 0.228, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.6231857601470785, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.578125, + "logits/rejected": -1.618749976158142, + "logps/chosen": -600.0, + "logps/rejected": -515.5999755859375, + "loss": 1.06064453125, + "memory(GiB)": 56.43, + "nll_loss": 0.7710937261581421, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.4437499046325684, + "rewards/margins": 1.9249999523162842, + "rewards/rejected": 1.5187499523162842, + "step": 15, + "train_speed(iter/s)": 0.07979 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 0.7033851800529384, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.623437523841858, + "logps/chosen": -374.6000061035156, + "logps/rejected": -527.5999755859375, + "loss": 0.7858154296875, + "memory(GiB)": 56.43, + "nll_loss": 0.673046886920929, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.862500190734863, + "rewards/margins": 3.2562499046325684, + "rewards/rejected": 3.6031250953674316, + "step": 20, + "train_speed(iter/s)": 0.080777 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -148.0, + "eval_logps/rejected": -1024.0, + "eval_loss": 0.3515625, + "eval_nll_loss": 0.32421875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.25, + "eval_rewards/margins": 4.375, + "eval_rewards/rejected": 4.84375, + "eval_runtime": 4.3823, + "eval_samples_per_second": 0.913, + "eval_steps_per_second": 0.228, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.1907563826066778, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6593749523162842, + "logits/rejected": -1.6203124523162842, + "logps/chosen": -543.5999755859375, + "logps/rejected": -643.7999877929688, + "loss": 0.5309295654296875, + "memory(GiB)": 56.43, + "nll_loss": 0.516406238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.918749809265137, + "rewards/margins": 6.724999904632568, + "rewards/rejected": 1.191796898841858, + "step": 25, + "train_speed(iter/s)": 0.07974 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.8601038031625003, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.717187523841858, + "logits/rejected": -1.6671874523162842, + "logps/chosen": -400.79998779296875, + "logps/rejected": -584.4000244140625, + "loss": 0.47745361328125, + "memory(GiB)": 56.43, + "nll_loss": 0.47734373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.5625, + "rewards/margins": 9.362500190734863, + "rewards/rejected": -0.774609386920929, + "step": 30, + "train_speed(iter/s)": 0.080007 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.734375, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -141.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.3046875, + "eval_nll_loss": 0.3046875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.9375, + "eval_rewards/margins": 9.6875, + "eval_rewards/rejected": 0.25, + "eval_runtime": 4.4114, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.43138365725560973, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.7312500476837158, + "logits/rejected": -1.7765624523162842, + "logps/chosen": -612.2000122070312, + "logps/rejected": -616.0, + "loss": 0.53447265625, + "memory(GiB)": 56.43, + "nll_loss": 0.5328124761581421, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.399999618530273, + "rewards/margins": 9.149999618530273, + "rewards/rejected": 0.22822265326976776, + "step": 35, + "train_speed(iter/s)": 0.079054 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.4899916200761263, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.678125023841858, + "logits/rejected": -1.759374976158142, + "logps/chosen": -474.0, + "logps/rejected": -646.4000244140625, + "loss": 0.506884765625, + "memory(GiB)": 56.43, + "nll_loss": 0.561718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.25, + "rewards/margins": 10.175000190734863, + "rewards/rejected": 0.07539062201976776, + "step": 40, + "train_speed(iter/s)": 0.079664 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.75, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -135.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.291259765625, + "eval_nll_loss": 0.291015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.5, + "eval_rewards/margins": 9.3125, + "eval_rewards/rejected": 1.203125, + "eval_runtime": 4.3597, + "eval_samples_per_second": 0.917, + "eval_steps_per_second": 0.229, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.5474321289483255, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.703125, + "logits/rejected": -1.6203124523162842, + "logps/chosen": -439.6000061035156, + "logps/rejected": -661.2000122070312, + "loss": 0.425408935546875, + "memory(GiB)": 57.88, + "nll_loss": 0.4253906309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.274999618530273, + "rewards/margins": 11.25, + "rewards/rejected": 0.04960937425494194, + "step": 45, + "train_speed(iter/s)": 0.078852 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.4769138301102208, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.7843749523162842, + "logits/rejected": -1.818750023841858, + "logps/chosen": -488.79998779296875, + "logps/rejected": -612.4000244140625, + "loss": 0.4717376708984375, + "memory(GiB)": 57.88, + "nll_loss": 0.4710937440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.949999809265137, + "rewards/margins": 11.824999809265137, + "rewards/rejected": 0.13237304985523224, + "step": 50, + "train_speed(iter/s)": 0.079395 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.78125, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -131.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.283935546875, + "eval_nll_loss": 0.283203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.875, + "eval_rewards/margins": 8.75, + "eval_rewards/rejected": 2.109375, + "eval_runtime": 4.4202, + "eval_samples_per_second": 0.905, + "eval_steps_per_second": 0.226, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.16456472919965845, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.732812523841858, + "logps/chosen": -465.6000061035156, + "logps/rejected": -570.4000244140625, + "loss": 0.45333251953125, + "memory(GiB)": 57.88, + "nll_loss": 0.4535156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.800000190734863, + "rewards/margins": 12.287500381469727, + "rewards/rejected": 0.517578125, + "step": 55, + "train_speed(iter/s)": 0.079928 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.13988896145691967, + "learning_rate": 5e-05, + "logits/chosen": -1.65625, + "logits/rejected": -1.6671874523162842, + "logps/chosen": -504.3999938964844, + "logps/rejected": -441.6000061035156, + "loss": 0.41478271484375, + "memory(GiB)": 57.88, + "nll_loss": 0.4144531190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.5625, + "rewards/margins": 13.162500381469727, + "rewards/rejected": 0.392578125, + "step": 60, + "train_speed(iter/s)": 0.080587 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1048.0, + "eval_loss": 0.282958984375, + "eval_nll_loss": 0.28125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 8.5, + "eval_rewards/rejected": 2.5, + "eval_runtime": 4.3947, + "eval_samples_per_second": 0.91, + "eval_steps_per_second": 0.228, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.26327409929500534, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.6953125, + "logits/rejected": -1.671875, + "logps/chosen": -445.3999938964844, + "logps/rejected": -494.0, + "loss": 0.402728271484375, + "memory(GiB)": 57.88, + "nll_loss": 0.40156251192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.25, + "rewards/margins": 13.037500381469727, + "rewards/rejected": 0.18845824897289276, + "step": 65, + "train_speed(iter/s)": 0.08052 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.2544494877535854, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.6796875, + "logits/rejected": -1.701562523841858, + "logps/chosen": -397.3999938964844, + "logps/rejected": -596.0, + "loss": 0.433489990234375, + "memory(GiB)": 57.88, + "nll_loss": 0.43281251192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.425000190734863, + "rewards/margins": 13.75, + "rewards/rejected": -0.3185058534145355, + "step": 70, + "train_speed(iter/s)": 0.080662 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.6953125, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.279296875, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 10.375, + "eval_rewards/rejected": 0.703125, + "eval_runtime": 4.4096, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.4107814395722938, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.6296875476837158, + "logits/rejected": -1.6124999523162842, + "logps/chosen": -498.3999938964844, + "logps/rejected": -641.2000122070312, + "loss": 0.4626708984375, + "memory(GiB)": 57.88, + "nll_loss": 0.4625000059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.237500190734863, + "rewards/margins": 14.0, + "rewards/rejected": -0.741406261920929, + "step": 75, + "train_speed(iter/s)": 0.080513 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.3636613929853374, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.712499976158142, + "logits/rejected": -1.649999976158142, + "logps/chosen": -427.6000061035156, + "logps/rejected": -590.7999877929688, + "loss": 0.4591552734375, + "memory(GiB)": 57.88, + "nll_loss": 0.4769531190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.100000381469727, + "rewards/margins": 14.237500190734863, + "rewards/rejected": -1.1325194835662842, + "step": 80, + "train_speed(iter/s)": 0.080112 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -129.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.279052734375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 10.6875, + "eval_rewards/rejected": 0.453125, + "eval_runtime": 4.3677, + "eval_samples_per_second": 0.916, + "eval_steps_per_second": 0.229, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.43974717734245733, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.618749976158142, + "logits/rejected": -1.631250023841858, + "logps/chosen": -380.3999938964844, + "logps/rejected": -703.2000122070312, + "loss": 0.3943389892578125, + "memory(GiB)": 57.88, + "nll_loss": 0.3941406309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.050000190734863, + "rewards/margins": 13.350000381469727, + "rewards/rejected": -0.31855469942092896, + "step": 85, + "train_speed(iter/s)": 0.080127 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.21291018894177471, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.603124976158142, + "logits/rejected": -1.631250023841858, + "logps/chosen": -482.79998779296875, + "logps/rejected": -551.5999755859375, + "loss": 0.42427978515625, + "memory(GiB)": 57.88, + "nll_loss": 0.42460936307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.637499809265137, + "rewards/margins": 14.949999809265137, + "rewards/rejected": -0.3154296875, + "step": 90, + "train_speed(iter/s)": 0.080807 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.703125, + "eval_logits/rejected": -1.2734375, + "eval_logps/chosen": -128.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.277587890625, + "eval_nll_loss": 0.27734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.25, + "eval_rewards/margins": 10.75, + "eval_rewards/rejected": 0.453125, + "eval_runtime": 4.3944, + "eval_samples_per_second": 0.91, + "eval_steps_per_second": 0.228, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.14794669988653775, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -1.678125023841858, + "logits/rejected": -1.6640625, + "logps/chosen": -510.79998779296875, + "logps/rejected": -682.0, + "loss": 0.445965576171875, + "memory(GiB)": 57.88, + "nll_loss": 0.4457031190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.787500381469727, + "rewards/margins": 16.237499237060547, + "rewards/rejected": -1.454687476158142, + "step": 95, + "train_speed(iter/s)": 0.080283 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.26486420620313644, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -1.6328125, + "logits/rejected": -1.6328125, + "logps/chosen": -397.3999938964844, + "logps/rejected": -527.2000122070312, + "loss": 0.393603515625, + "memory(GiB)": 67.13, + "nll_loss": 0.3935546875, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.162500381469727, + "rewards/margins": 14.725000381469727, + "rewards/rejected": -1.5671875476837158, + "step": 100, + "train_speed(iter/s)": 0.080739 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -1.703125, + "eval_logits/rejected": -1.2734375, + "eval_logps/chosen": -128.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.27783203125, + "eval_nll_loss": 0.27734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.25, + "eval_rewards/margins": 10.8125, + "eval_rewards/rejected": 0.453125, + "eval_runtime": 4.4191, + "eval_samples_per_second": 0.905, + "eval_steps_per_second": 0.226, + "step": 100 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 421598153670656.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b1fa183495283af14f6d8d5fb1ff6c44483b8cb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d03b97c8a90b75abcac34667608460f42b83e53dabd6e225b8ee6aa53d0d020 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-100/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/README.md b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bb68a7d6caeadf83c94bf41a98174b4994de1bfb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-32b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37fbac01e884b4737b62d9f8690242edcb865230 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "k_proj", + "v_proj", + "q_proj", + "up_proj", + "gate_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1bdb92217dc42432819d9ad547f302caee5f5147 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5215fb1738fd8bd25a70178f9fa8821853bb5deba3effe8b3715bbcfa5985ab2 +size 134337704 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/args.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/args.json new file mode 100644 index 0000000000000000000000000000000000000000..116251445760e59feb4fe2e3015344d18d3f7253 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-32b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..df793b4c9ef79cf9488ad18f1e3660595818bbf0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40da59d749509923fcda7d505fa60c52123b80c214530225294ca61052771709 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d7f972cca5279d3e986a4495f25f580f20afe68 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d4b1def04a9ca725f0f7ace8e0974f518c464670474e4b51f5bb2d689f4b54d +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a616369392accc81372f8a6f0ec184c0d4f472eb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d08dbd83d7ba3de45ce2724252cddf1981f54968cf36fc9ad339f051680f9ef +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf6492244f3585e33950908892804174cd5d51f8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7be3d00c500bf09c1dcfdc99528f34324001aa1c83bbbe92ebdc1b62ff5a7da9 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9fb7d5a89b8bc9d344e8843711c7a70d7d6173ee --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddf2578149454e50a90e35d2c4c17dbd90c77f81ac3d1b5da38816cc02b4bfe9 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e35c499fc67d7876cb8570bfaec68d29ccc62ce5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c587c1cfef44882878b9bfd91f931f05b1b0bac18e4fa883f72c85d94da2c98 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e888b0287da8ee47bcab892940171b2f99fb4234 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42ef35645b9bd3034696bd11d1cc87a9fc1b07aa6d3861c48e5181fa5fc0b705 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f77e74be8e0df1553bce6f872736200639696908 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb1d68d1242718e431ae68bead92a4d2191c7a595d0d55d5d77a1d2e735e0701 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..86889b48637c1519700dcf392c5fd4250c396276 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7011aa79e9f3e47f5797e1231a7fceb11e3af7590251ae3883ad9e4abfdb2181 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d73b06a1459664f42de4ff2681f86a5050908335 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63fed471a63452cc896eb8a12991a31cc24a71ca531f75e6e00532b17402da52 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..97dce84c82cccb211f43125e88fc249a2e10eef9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07a28e6722f8e1e9486e98ad2f4d265dedb149dcd5b03e8e36b27b87b614ab65 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa9fa451d8acd14713d9b35ae023bab84f6b3dd9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed22d973ae09d40e9ac5284fda7feeff1b1fea69e729bb2874f8c54f3af8a23 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..909d392b69bc3711f01dd3727ef923fb6608d1be --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a76d28c1d0c5e9c7c0592fe6da9c505fc143cb23c4f5b635cb7fa969ae793eec +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a152dafa7e8732de84860fddffdfcfa43b56b4a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84a53510eb271a5b9d986660a3310fb54fe0e1d414524dfd3a102a800b573088 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3aa888112024f48d86a6928a0293d77a4b044811 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ebd528e13be2e27395632c38ff3b4d851cde3c253a572646b7294c5299a1f4d +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae76bf3d1aa7b2c4c127fecbef75b7c0ae52c87e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1387441b883b4a131fc274f85a4162f8484343f74c089ce6dac8df096333add7 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/latest b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/latest new file mode 100644 index 0000000000000000000000000000000000000000..a9a22a69382a7711ca9e8ab6945c6d2cc8984927 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/latest @@ -0,0 +1 @@ +global_step110 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..584f4a4a43f100f35696d7314a633631af587f25 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7891ffa7c7dae99113aa986d67278b52b8c57db55001dc3547a61f24569a34ee +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..05b027a867e5e9cebd446293ecff82cfb240cc76 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b92875cb04deec367605433847d1bda444b178b643d2da7ed9aaf738d232b4 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..af98f0dfe2a5d89fbccf90df58246a0b078c7016 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f5f3338a05e325b5408a1cd0b6f5e5b10fad05fe479d63f44bec4cf18107d6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..715aa4a4ee3915f810fc2bacb2153eb8a0913781 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1be749fea477a3867d44010631937e0d8f071ca5f9614f9795c92c7fa68833a6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..c7bde70899833455b6ee4a99aff9388abc5ffe92 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbc4a5ea4532c621f4c8e9891117b2e597a7f005001e8b4f2a1b4da8c82bf964 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..90cdeaa2fe438098e9d95ddbc06c765e51af1e78 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480f9fe7dd71b54d915b46162e34b780ba2467d5542115cc809dbca60b394c0e +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..2bd30529614c5be239cd9477af6bef0e313740b6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11d982dcd813e82c2d97a5491ce9624cff2dd22e8655ea617ccef1fc1474470 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..bed311094effd49cc2c89237c675f56eade157d1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73494fac3a001cba7cedd097b97f028d4c1d136ee6709214b0a7fe305e5b9089 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..38b1a376e7c81e3c533cf8a69ddf4eefa9d1336c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0767a9fe84680a5a8a76633a443cb301092115c026c1f5f7f1fbdc53dd7f856f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1c3c7cdeb864ed93558ed36c9895d2adfe0566be --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/trainer_state.json @@ -0,0 +1,634 @@ +{ + "best_metric": 0.27758789, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90", + "epoch": 2.8947368421052633, + "eval_steps": 10, + "global_step": 110, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 4.767905865350689, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -1.609375, + "logits/rejected": -1.609375, + "logps/chosen": -664.0, + "logps/rejected": -370.0, + "loss": 1.2783203125, + "memory(GiB)": 14.31, + "nll_loss": 0.5859375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.059163 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 5.720924346118727, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.57421875, + "logits/rejected": -1.640625, + "logps/chosen": -676.25, + "logps/rejected": -513.5, + "loss": 1.839111328125, + "memory(GiB)": 24.25, + "nll_loss": 1.1826171875, + "rewards/accuracies": 0.34375, + "rewards/chosen": 0.082916259765625, + "rewards/margins": 0.09368896484375, + "rewards/rejected": -0.0110015869140625, + "step": 5, + "train_speed(iter/s)": 0.080606 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 4.536016569461848, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.5046875476837158, + "logits/rejected": -1.5343749523162842, + "logps/chosen": -572.0, + "logps/rejected": -645.5999755859375, + "loss": 1.83935546875, + "memory(GiB)": 56.43, + "nll_loss": 1.3250000476837158, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.6597656011581421, + "rewards/margins": 0.4574218690395355, + "rewards/rejected": 0.20273438096046448, + "step": 10, + "train_speed(iter/s)": 0.078543 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -1.6171875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -213.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.77734375, + "eval_nll_loss": 0.5546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 2.75, + "eval_rewards/margins": 1.3984375, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 4.3928, + "eval_samples_per_second": 0.911, + "eval_steps_per_second": 0.228, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.6231857601470785, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.578125, + "logits/rejected": -1.618749976158142, + "logps/chosen": -600.0, + "logps/rejected": -515.5999755859375, + "loss": 1.06064453125, + "memory(GiB)": 56.43, + "nll_loss": 0.7710937261581421, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.4437499046325684, + "rewards/margins": 1.9249999523162842, + "rewards/rejected": 1.5187499523162842, + "step": 15, + "train_speed(iter/s)": 0.07979 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 0.7033851800529384, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.623437523841858, + "logps/chosen": -374.6000061035156, + "logps/rejected": -527.5999755859375, + "loss": 0.7858154296875, + "memory(GiB)": 56.43, + "nll_loss": 0.673046886920929, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.862500190734863, + "rewards/margins": 3.2562499046325684, + "rewards/rejected": 3.6031250953674316, + "step": 20, + "train_speed(iter/s)": 0.080777 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -148.0, + "eval_logps/rejected": -1024.0, + "eval_loss": 0.3515625, + "eval_nll_loss": 0.32421875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.25, + "eval_rewards/margins": 4.375, + "eval_rewards/rejected": 4.84375, + "eval_runtime": 4.3823, + "eval_samples_per_second": 0.913, + "eval_steps_per_second": 0.228, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.1907563826066778, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6593749523162842, + "logits/rejected": -1.6203124523162842, + "logps/chosen": -543.5999755859375, + "logps/rejected": -643.7999877929688, + "loss": 0.5309295654296875, + "memory(GiB)": 56.43, + "nll_loss": 0.516406238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.918749809265137, + "rewards/margins": 6.724999904632568, + "rewards/rejected": 1.191796898841858, + "step": 25, + "train_speed(iter/s)": 0.07974 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.8601038031625003, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.717187523841858, + "logits/rejected": -1.6671874523162842, + "logps/chosen": -400.79998779296875, + "logps/rejected": -584.4000244140625, + "loss": 0.47745361328125, + "memory(GiB)": 56.43, + "nll_loss": 0.47734373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.5625, + "rewards/margins": 9.362500190734863, + "rewards/rejected": -0.774609386920929, + "step": 30, + "train_speed(iter/s)": 0.080007 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.734375, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -141.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.3046875, + "eval_nll_loss": 0.3046875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.9375, + "eval_rewards/margins": 9.6875, + "eval_rewards/rejected": 0.25, + "eval_runtime": 4.4114, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.43138365725560973, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.7312500476837158, + "logits/rejected": -1.7765624523162842, + "logps/chosen": -612.2000122070312, + "logps/rejected": -616.0, + "loss": 0.53447265625, + "memory(GiB)": 56.43, + "nll_loss": 0.5328124761581421, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.399999618530273, + "rewards/margins": 9.149999618530273, + "rewards/rejected": 0.22822265326976776, + "step": 35, + "train_speed(iter/s)": 0.079054 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.4899916200761263, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.678125023841858, + "logits/rejected": -1.759374976158142, + "logps/chosen": -474.0, + "logps/rejected": -646.4000244140625, + "loss": 0.506884765625, + "memory(GiB)": 56.43, + "nll_loss": 0.561718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.25, + "rewards/margins": 10.175000190734863, + "rewards/rejected": 0.07539062201976776, + "step": 40, + "train_speed(iter/s)": 0.079664 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.75, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -135.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.291259765625, + "eval_nll_loss": 0.291015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.5, + "eval_rewards/margins": 9.3125, + "eval_rewards/rejected": 1.203125, + "eval_runtime": 4.3597, + "eval_samples_per_second": 0.917, + "eval_steps_per_second": 0.229, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.5474321289483255, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.703125, + "logits/rejected": -1.6203124523162842, + "logps/chosen": -439.6000061035156, + "logps/rejected": -661.2000122070312, + "loss": 0.425408935546875, + "memory(GiB)": 57.88, + "nll_loss": 0.4253906309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.274999618530273, + "rewards/margins": 11.25, + "rewards/rejected": 0.04960937425494194, + "step": 45, + "train_speed(iter/s)": 0.078852 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.4769138301102208, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.7843749523162842, + "logits/rejected": -1.818750023841858, + "logps/chosen": -488.79998779296875, + "logps/rejected": -612.4000244140625, + "loss": 0.4717376708984375, + "memory(GiB)": 57.88, + "nll_loss": 0.4710937440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.949999809265137, + "rewards/margins": 11.824999809265137, + "rewards/rejected": 0.13237304985523224, + "step": 50, + "train_speed(iter/s)": 0.079395 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.78125, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -131.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.283935546875, + "eval_nll_loss": 0.283203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.875, + "eval_rewards/margins": 8.75, + "eval_rewards/rejected": 2.109375, + "eval_runtime": 4.4202, + "eval_samples_per_second": 0.905, + "eval_steps_per_second": 0.226, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.16456472919965845, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.732812523841858, + "logps/chosen": -465.6000061035156, + "logps/rejected": -570.4000244140625, + "loss": 0.45333251953125, + "memory(GiB)": 57.88, + "nll_loss": 0.4535156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.800000190734863, + "rewards/margins": 12.287500381469727, + "rewards/rejected": 0.517578125, + "step": 55, + "train_speed(iter/s)": 0.079928 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.13988896145691967, + "learning_rate": 5e-05, + "logits/chosen": -1.65625, + "logits/rejected": -1.6671874523162842, + "logps/chosen": -504.3999938964844, + "logps/rejected": -441.6000061035156, + "loss": 0.41478271484375, + "memory(GiB)": 57.88, + "nll_loss": 0.4144531190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.5625, + "rewards/margins": 13.162500381469727, + "rewards/rejected": 0.392578125, + "step": 60, + "train_speed(iter/s)": 0.080587 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1048.0, + "eval_loss": 0.282958984375, + "eval_nll_loss": 0.28125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 8.5, + "eval_rewards/rejected": 2.5, + "eval_runtime": 4.3947, + "eval_samples_per_second": 0.91, + "eval_steps_per_second": 0.228, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.26327409929500534, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.6953125, + "logits/rejected": -1.671875, + "logps/chosen": -445.3999938964844, + "logps/rejected": -494.0, + "loss": 0.402728271484375, + "memory(GiB)": 57.88, + "nll_loss": 0.40156251192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.25, + "rewards/margins": 13.037500381469727, + "rewards/rejected": 0.18845824897289276, + "step": 65, + "train_speed(iter/s)": 0.08052 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.2544494877535854, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.6796875, + "logits/rejected": -1.701562523841858, + "logps/chosen": -397.3999938964844, + "logps/rejected": -596.0, + "loss": 0.433489990234375, + "memory(GiB)": 57.88, + "nll_loss": 0.43281251192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.425000190734863, + "rewards/margins": 13.75, + "rewards/rejected": -0.3185058534145355, + "step": 70, + "train_speed(iter/s)": 0.080662 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.6953125, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.279296875, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 10.375, + "eval_rewards/rejected": 0.703125, + "eval_runtime": 4.4096, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.4107814395722938, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.6296875476837158, + "logits/rejected": -1.6124999523162842, + "logps/chosen": -498.3999938964844, + "logps/rejected": -641.2000122070312, + "loss": 0.4626708984375, + "memory(GiB)": 57.88, + "nll_loss": 0.4625000059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.237500190734863, + "rewards/margins": 14.0, + "rewards/rejected": -0.741406261920929, + "step": 75, + "train_speed(iter/s)": 0.080513 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.3636613929853374, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.712499976158142, + "logits/rejected": -1.649999976158142, + "logps/chosen": -427.6000061035156, + "logps/rejected": -590.7999877929688, + "loss": 0.4591552734375, + "memory(GiB)": 57.88, + "nll_loss": 0.4769531190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.100000381469727, + "rewards/margins": 14.237500190734863, + "rewards/rejected": -1.1325194835662842, + "step": 80, + "train_speed(iter/s)": 0.080112 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -129.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.279052734375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 10.6875, + "eval_rewards/rejected": 0.453125, + "eval_runtime": 4.3677, + "eval_samples_per_second": 0.916, + "eval_steps_per_second": 0.229, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.43974717734245733, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.618749976158142, + "logits/rejected": -1.631250023841858, + "logps/chosen": -380.3999938964844, + "logps/rejected": -703.2000122070312, + "loss": 0.3943389892578125, + "memory(GiB)": 57.88, + "nll_loss": 0.3941406309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.050000190734863, + "rewards/margins": 13.350000381469727, + "rewards/rejected": -0.31855469942092896, + "step": 85, + "train_speed(iter/s)": 0.080127 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.21291018894177471, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.603124976158142, + "logits/rejected": -1.631250023841858, + "logps/chosen": -482.79998779296875, + "logps/rejected": -551.5999755859375, + "loss": 0.42427978515625, + "memory(GiB)": 57.88, + "nll_loss": 0.42460936307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.637499809265137, + "rewards/margins": 14.949999809265137, + "rewards/rejected": -0.3154296875, + "step": 90, + "train_speed(iter/s)": 0.080807 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.703125, + "eval_logits/rejected": -1.2734375, + "eval_logps/chosen": -128.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.277587890625, + "eval_nll_loss": 0.27734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.25, + "eval_rewards/margins": 10.75, + "eval_rewards/rejected": 0.453125, + "eval_runtime": 4.3944, + "eval_samples_per_second": 0.91, + "eval_steps_per_second": 0.228, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.14794669988653775, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -1.678125023841858, + "logits/rejected": -1.6640625, + "logps/chosen": -510.79998779296875, + "logps/rejected": -682.0, + "loss": 0.445965576171875, + "memory(GiB)": 57.88, + "nll_loss": 0.4457031190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.787500381469727, + "rewards/margins": 16.237499237060547, + "rewards/rejected": -1.454687476158142, + "step": 95, + "train_speed(iter/s)": 0.080283 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.26486420620313644, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -1.6328125, + "logits/rejected": -1.6328125, + "logps/chosen": -397.3999938964844, + "logps/rejected": -527.2000122070312, + "loss": 0.393603515625, + "memory(GiB)": 67.13, + "nll_loss": 0.3935546875, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.162500381469727, + "rewards/margins": 14.725000381469727, + "rewards/rejected": -1.5671875476837158, + "step": 100, + "train_speed(iter/s)": 0.080739 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -1.703125, + "eval_logits/rejected": -1.2734375, + "eval_logps/chosen": -128.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.27783203125, + "eval_nll_loss": 0.27734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.25, + "eval_rewards/margins": 10.8125, + "eval_rewards/rejected": 0.453125, + "eval_runtime": 4.4191, + "eval_samples_per_second": 0.905, + "eval_steps_per_second": 0.226, + "step": 100 + }, + { + "epoch": 2.763157894736842, + "grad_norm": 0.41604751594338824, + "learning_rate": 1.70370868554659e-06, + "logits/chosen": -1.6281249523162842, + "logits/rejected": -1.701562523841858, + "logps/chosen": -428.8500061035156, + "logps/rejected": -531.5999755859375, + "loss": 0.4190277099609375, + "memory(GiB)": 67.13, + "nll_loss": 0.4195312559604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.8125, + "rewards/margins": 14.725000381469727, + "rewards/rejected": -0.927734375, + "step": 105, + "train_speed(iter/s)": 0.08056 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 0.2965654266316545, + "learning_rate": 3.380821129028489e-07, + "logits/chosen": -1.6671874523162842, + "logits/rejected": -1.6875, + "logps/chosen": -446.0, + "logps/rejected": -573.7999877929688, + "loss": 0.3798828125, + "memory(GiB)": 67.13, + "nll_loss": 0.3798828125, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.4375, + "rewards/margins": 15.862500190734863, + "rewards/rejected": -1.431249976158142, + "step": 110, + "train_speed(iter/s)": 0.080742 + }, + { + "epoch": 2.8947368421052633, + "eval_logits/chosen": -1.703125, + "eval_logits/rejected": -1.2734375, + "eval_logps/chosen": -128.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.277587890625, + "eval_nll_loss": 0.27734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.25, + "eval_rewards/margins": 10.875, + "eval_rewards/rejected": 0.3515625, + "eval_runtime": 4.4168, + "eval_samples_per_second": 0.906, + "eval_steps_per_second": 0.226, + "step": 110 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 463155319799808.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b1fa183495283af14f6d8d5fb1ff6c44483b8cb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d03b97c8a90b75abcac34667608460f42b83e53dabd6e225b8ee6aa53d0d020 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-110/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/README.md b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bb68a7d6caeadf83c94bf41a98174b4994de1bfb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-32b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37fbac01e884b4737b62d9f8690242edcb865230 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "k_proj", + "v_proj", + "q_proj", + "up_proj", + "gate_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b76d578d32133a04cc265fcff190053aa194e38e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6649eb849425f9ef4b96b49b94c90fa9ca7b3f79447f6c9e12b5a7e5f276a96 +size 134337704 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/args.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/args.json new file mode 100644 index 0000000000000000000000000000000000000000..116251445760e59feb4fe2e3015344d18d3f7253 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-32b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..28d6ab93ea8099219294657b3305117d67258c99 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a20eb79b54322593a09edcaaf6a2487f54bebb0cc38d4d59a109e6f22905415f +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..af50b31dcd90aa3d9894221430ec195f2ab01ae9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:956750cd2d9fc7fb7daa0fbb4ad9c44d046ba671722e17e67b049ee16733a1c6 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a746d061070044a6b8dffa11e540e1d72e5da2e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b9c3b8793760ec7c15009d1e62237575c12e4069605bc1607da0a091d84e28 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fbad0bf2023f41be8ceb1c79ebb46765b615218 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87a4fcbabcc088e8f71fff65a5f8706cd0f3a08fb93b2902d49486eb561f6ea1 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fc57b5c22324607956d6afea4a7643f1daa399b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7a6c5f99afaaffe6ab9ea07301c7773016a686cb7996e16db22df5cbb66faf4 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c137e006e2b22265848e56439e0a35fee956cde3 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2443b04289ad9135c75bc2b73cc62bf0248978161b04e22385369a4c8d05c950 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b05d84b29a5911e37c0240103cc238f06f2fe6f2 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:726642f1ee038ceb5592c52edc1c06d46ef2a366ed48a040d0234180b97f498c +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..532bc67a866eb16f39a4a257fc79d72ac3bda5e5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:671b2a18adaabf099d7fab8a923a51e454599dd63620a2d3ccf20871e0a9f6db +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..53dc9a6bcc4e6233069b7970c0820e8435e16813 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54095c99836ae030118e2e030bc7eea5f651d02e644a0a76212771adf9d43fb6 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc50b2a8cc153aaccde56f64d7b0e4c72ff0ba09 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38c8e755f3e36ce3faa3282733c9a7713b4e9fb6b7c76449877650009b7273b2 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bfa3ae9fa4b8746734022a7e3ed29ee5ba3143da --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3a9739ed8863b740a5e9414e826780073db7ac9bc6d0a35f9c61f85e9df80f7 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc0f8ac127697dc96736d7d6d1d41156f9e4f29a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:473f65edb4ddb09b0c01b52e7029aa922171edbf07c0e70628012b5f5bc1bd7c +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ac70ad5d11dd3e2a380903dcd5cbb7b3cad9f69 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7a0af1b4843d25f0e5fae735a8996e55d4c657bcd930240e80786f2ba0f694e +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..072e23fe09240f540de4effed0c74b58be373e17 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbd5a00ccd32301ab09ca14e493b34750056e34c8c69eb1403fab1bd6efc836a +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4298ea5ce265d5347a67f5d5b5f8940a76d317be --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8c938ce4ac141fdc6d26f69e85373475e7726d2f5fb964586cc8016d96d1710 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d98961c8fe7a7d1b4c8d1b0fcad732077dd62614 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0ed8f3af49e6ad29ddd7590f0827044c41016e508ba568599cc7081af5dc423 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/latest b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/latest new file mode 100644 index 0000000000000000000000000000000000000000..aad80f76777fd4d23b0b81026f4601524335cbe1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/latest @@ -0,0 +1 @@ +global_step114 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..80f51268a9828e9592a20d8ae8b2cd4ba4bc362c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d994b317c4df888a1a1aabc0c532e81f1fa34c18c8313cb2feadca3bb37194 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..62e581603d525611f9660b6e859462f72bbc9258 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5b05860618aa49c7f5d8c366d6ee73cf8b3b0d0adc17d9313b72621630d0aa +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..18b03e632222a58e33ea4fca874b9c52628cc5e1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7262faf861e984775b4fd85bc76a11b0b8b04037690e8a08a58cf9ff5328a042 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..41735b5f7ace13ffa57ebed3e7042f1a48ac17fb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9479cad91150e2e266d17eb95fe678579a770f6df6b53496cf72067b186b094d +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..ebbb93c1d99b1645075ea27fc9fae66992a691f5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:435cb6cf559e0ce3fe0d4582cac16ea40b48b7a64589952402a4c399cafbfc00 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0656f84b77a33c9ceba9df16f36437b55ef71bc7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51001b0d8dc5792180c3a9705ccbfa66b61d46d7639afb6f7abf409629ed74f +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..32b3a313372ee4a2eeaeed69789f8fb4e2c70ad0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1e87084f11088fdce293e1fbbb05e35f5c7385b00e2f9ba195bf61cb36f757d +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..9c5a45264129fe1d7c409a6867de1a9751476a8e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d32e9bdd65145ae509e6c6ef4f6ea9d842f94a34c34a0d7d2ab6c248d3f2121 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a86ac614a477eb67963adb2c8c07f37c79ded059 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d7a9fd18bda7faa50931342147a7de5605bed0f91f6c70d821e84b7bf8f444f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..95cc2ed50c021aa1fc8e7af1749530c4d47e011a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/trainer_state.json @@ -0,0 +1,651 @@ +{ + "best_metric": 0.27734375, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114", + "epoch": 3.0, + "eval_steps": 10, + "global_step": 114, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 4.767905865350689, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -1.609375, + "logits/rejected": -1.609375, + "logps/chosen": -664.0, + "logps/rejected": -370.0, + "loss": 1.2783203125, + "memory(GiB)": 14.31, + "nll_loss": 0.5859375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.059163 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 5.720924346118727, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.57421875, + "logits/rejected": -1.640625, + "logps/chosen": -676.25, + "logps/rejected": -513.5, + "loss": 1.839111328125, + "memory(GiB)": 24.25, + "nll_loss": 1.1826171875, + "rewards/accuracies": 0.34375, + "rewards/chosen": 0.082916259765625, + "rewards/margins": 0.09368896484375, + "rewards/rejected": -0.0110015869140625, + "step": 5, + "train_speed(iter/s)": 0.080606 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 4.536016569461848, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.5046875476837158, + "logits/rejected": -1.5343749523162842, + "logps/chosen": -572.0, + "logps/rejected": -645.5999755859375, + "loss": 1.83935546875, + "memory(GiB)": 56.43, + "nll_loss": 1.3250000476837158, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.6597656011581421, + "rewards/margins": 0.4574218690395355, + "rewards/rejected": 0.20273438096046448, + "step": 10, + "train_speed(iter/s)": 0.078543 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -1.6171875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -213.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.77734375, + "eval_nll_loss": 0.5546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 2.75, + "eval_rewards/margins": 1.3984375, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 4.3928, + "eval_samples_per_second": 0.911, + "eval_steps_per_second": 0.228, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.6231857601470785, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.578125, + "logits/rejected": -1.618749976158142, + "logps/chosen": -600.0, + "logps/rejected": -515.5999755859375, + "loss": 1.06064453125, + "memory(GiB)": 56.43, + "nll_loss": 0.7710937261581421, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.4437499046325684, + "rewards/margins": 1.9249999523162842, + "rewards/rejected": 1.5187499523162842, + "step": 15, + "train_speed(iter/s)": 0.07979 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 0.7033851800529384, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.623437523841858, + "logps/chosen": -374.6000061035156, + "logps/rejected": -527.5999755859375, + "loss": 0.7858154296875, + "memory(GiB)": 56.43, + "nll_loss": 0.673046886920929, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.862500190734863, + "rewards/margins": 3.2562499046325684, + "rewards/rejected": 3.6031250953674316, + "step": 20, + "train_speed(iter/s)": 0.080777 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -148.0, + "eval_logps/rejected": -1024.0, + "eval_loss": 0.3515625, + "eval_nll_loss": 0.32421875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.25, + "eval_rewards/margins": 4.375, + "eval_rewards/rejected": 4.84375, + "eval_runtime": 4.3823, + "eval_samples_per_second": 0.913, + "eval_steps_per_second": 0.228, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.1907563826066778, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6593749523162842, + "logits/rejected": -1.6203124523162842, + "logps/chosen": -543.5999755859375, + "logps/rejected": -643.7999877929688, + "loss": 0.5309295654296875, + "memory(GiB)": 56.43, + "nll_loss": 0.516406238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.918749809265137, + "rewards/margins": 6.724999904632568, + "rewards/rejected": 1.191796898841858, + "step": 25, + "train_speed(iter/s)": 0.07974 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.8601038031625003, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.717187523841858, + "logits/rejected": -1.6671874523162842, + "logps/chosen": -400.79998779296875, + "logps/rejected": -584.4000244140625, + "loss": 0.47745361328125, + "memory(GiB)": 56.43, + "nll_loss": 0.47734373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.5625, + "rewards/margins": 9.362500190734863, + "rewards/rejected": -0.774609386920929, + "step": 30, + "train_speed(iter/s)": 0.080007 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.734375, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -141.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.3046875, + "eval_nll_loss": 0.3046875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.9375, + "eval_rewards/margins": 9.6875, + "eval_rewards/rejected": 0.25, + "eval_runtime": 4.4114, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.43138365725560973, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.7312500476837158, + "logits/rejected": -1.7765624523162842, + "logps/chosen": -612.2000122070312, + "logps/rejected": -616.0, + "loss": 0.53447265625, + "memory(GiB)": 56.43, + "nll_loss": 0.5328124761581421, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.399999618530273, + "rewards/margins": 9.149999618530273, + "rewards/rejected": 0.22822265326976776, + "step": 35, + "train_speed(iter/s)": 0.079054 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.4899916200761263, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.678125023841858, + "logits/rejected": -1.759374976158142, + "logps/chosen": -474.0, + "logps/rejected": -646.4000244140625, + "loss": 0.506884765625, + "memory(GiB)": 56.43, + "nll_loss": 0.561718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.25, + "rewards/margins": 10.175000190734863, + "rewards/rejected": 0.07539062201976776, + "step": 40, + "train_speed(iter/s)": 0.079664 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.75, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -135.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.291259765625, + "eval_nll_loss": 0.291015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.5, + "eval_rewards/margins": 9.3125, + "eval_rewards/rejected": 1.203125, + "eval_runtime": 4.3597, + "eval_samples_per_second": 0.917, + "eval_steps_per_second": 0.229, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.5474321289483255, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.703125, + "logits/rejected": -1.6203124523162842, + "logps/chosen": -439.6000061035156, + "logps/rejected": -661.2000122070312, + "loss": 0.425408935546875, + "memory(GiB)": 57.88, + "nll_loss": 0.4253906309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.274999618530273, + "rewards/margins": 11.25, + "rewards/rejected": 0.04960937425494194, + "step": 45, + "train_speed(iter/s)": 0.078852 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.4769138301102208, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.7843749523162842, + "logits/rejected": -1.818750023841858, + "logps/chosen": -488.79998779296875, + "logps/rejected": -612.4000244140625, + "loss": 0.4717376708984375, + "memory(GiB)": 57.88, + "nll_loss": 0.4710937440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.949999809265137, + "rewards/margins": 11.824999809265137, + "rewards/rejected": 0.13237304985523224, + "step": 50, + "train_speed(iter/s)": 0.079395 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.78125, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -131.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.283935546875, + "eval_nll_loss": 0.283203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.875, + "eval_rewards/margins": 8.75, + "eval_rewards/rejected": 2.109375, + "eval_runtime": 4.4202, + "eval_samples_per_second": 0.905, + "eval_steps_per_second": 0.226, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.16456472919965845, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.732812523841858, + "logps/chosen": -465.6000061035156, + "logps/rejected": -570.4000244140625, + "loss": 0.45333251953125, + "memory(GiB)": 57.88, + "nll_loss": 0.4535156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.800000190734863, + "rewards/margins": 12.287500381469727, + "rewards/rejected": 0.517578125, + "step": 55, + "train_speed(iter/s)": 0.079928 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.13988896145691967, + "learning_rate": 5e-05, + "logits/chosen": -1.65625, + "logits/rejected": -1.6671874523162842, + "logps/chosen": -504.3999938964844, + "logps/rejected": -441.6000061035156, + "loss": 0.41478271484375, + "memory(GiB)": 57.88, + "nll_loss": 0.4144531190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.5625, + "rewards/margins": 13.162500381469727, + "rewards/rejected": 0.392578125, + "step": 60, + "train_speed(iter/s)": 0.080587 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1048.0, + "eval_loss": 0.282958984375, + "eval_nll_loss": 0.28125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 8.5, + "eval_rewards/rejected": 2.5, + "eval_runtime": 4.3947, + "eval_samples_per_second": 0.91, + "eval_steps_per_second": 0.228, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.26327409929500534, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.6953125, + "logits/rejected": -1.671875, + "logps/chosen": -445.3999938964844, + "logps/rejected": -494.0, + "loss": 0.402728271484375, + "memory(GiB)": 57.88, + "nll_loss": 0.40156251192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.25, + "rewards/margins": 13.037500381469727, + "rewards/rejected": 0.18845824897289276, + "step": 65, + "train_speed(iter/s)": 0.08052 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.2544494877535854, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.6796875, + "logits/rejected": -1.701562523841858, + "logps/chosen": -397.3999938964844, + "logps/rejected": -596.0, + "loss": 0.433489990234375, + "memory(GiB)": 57.88, + "nll_loss": 0.43281251192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.425000190734863, + "rewards/margins": 13.75, + "rewards/rejected": -0.3185058534145355, + "step": 70, + "train_speed(iter/s)": 0.080662 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.6953125, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.279296875, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 10.375, + "eval_rewards/rejected": 0.703125, + "eval_runtime": 4.4096, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.4107814395722938, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.6296875476837158, + "logits/rejected": -1.6124999523162842, + "logps/chosen": -498.3999938964844, + "logps/rejected": -641.2000122070312, + "loss": 0.4626708984375, + "memory(GiB)": 57.88, + "nll_loss": 0.4625000059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.237500190734863, + "rewards/margins": 14.0, + "rewards/rejected": -0.741406261920929, + "step": 75, + "train_speed(iter/s)": 0.080513 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.3636613929853374, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.712499976158142, + "logits/rejected": -1.649999976158142, + "logps/chosen": -427.6000061035156, + "logps/rejected": -590.7999877929688, + "loss": 0.4591552734375, + "memory(GiB)": 57.88, + "nll_loss": 0.4769531190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.100000381469727, + "rewards/margins": 14.237500190734863, + "rewards/rejected": -1.1325194835662842, + "step": 80, + "train_speed(iter/s)": 0.080112 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -129.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.279052734375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 10.6875, + "eval_rewards/rejected": 0.453125, + "eval_runtime": 4.3677, + "eval_samples_per_second": 0.916, + "eval_steps_per_second": 0.229, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.43974717734245733, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.618749976158142, + "logits/rejected": -1.631250023841858, + "logps/chosen": -380.3999938964844, + "logps/rejected": -703.2000122070312, + "loss": 0.3943389892578125, + "memory(GiB)": 57.88, + "nll_loss": 0.3941406309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.050000190734863, + "rewards/margins": 13.350000381469727, + "rewards/rejected": -0.31855469942092896, + "step": 85, + "train_speed(iter/s)": 0.080127 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.21291018894177471, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.603124976158142, + "logits/rejected": -1.631250023841858, + "logps/chosen": -482.79998779296875, + "logps/rejected": -551.5999755859375, + "loss": 0.42427978515625, + "memory(GiB)": 57.88, + "nll_loss": 0.42460936307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.637499809265137, + "rewards/margins": 14.949999809265137, + "rewards/rejected": -0.3154296875, + "step": 90, + "train_speed(iter/s)": 0.080807 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.703125, + "eval_logits/rejected": -1.2734375, + "eval_logps/chosen": -128.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.277587890625, + "eval_nll_loss": 0.27734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.25, + "eval_rewards/margins": 10.75, + "eval_rewards/rejected": 0.453125, + "eval_runtime": 4.3944, + "eval_samples_per_second": 0.91, + "eval_steps_per_second": 0.228, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.14794669988653775, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -1.678125023841858, + "logits/rejected": -1.6640625, + "logps/chosen": -510.79998779296875, + "logps/rejected": -682.0, + "loss": 0.445965576171875, + "memory(GiB)": 57.88, + "nll_loss": 0.4457031190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.787500381469727, + "rewards/margins": 16.237499237060547, + "rewards/rejected": -1.454687476158142, + "step": 95, + "train_speed(iter/s)": 0.080283 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.26486420620313644, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -1.6328125, + "logits/rejected": -1.6328125, + "logps/chosen": -397.3999938964844, + "logps/rejected": -527.2000122070312, + "loss": 0.393603515625, + "memory(GiB)": 67.13, + "nll_loss": 0.3935546875, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.162500381469727, + "rewards/margins": 14.725000381469727, + "rewards/rejected": -1.5671875476837158, + "step": 100, + "train_speed(iter/s)": 0.080739 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -1.703125, + "eval_logits/rejected": -1.2734375, + "eval_logps/chosen": -128.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.27783203125, + "eval_nll_loss": 0.27734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.25, + "eval_rewards/margins": 10.8125, + "eval_rewards/rejected": 0.453125, + "eval_runtime": 4.4191, + "eval_samples_per_second": 0.905, + "eval_steps_per_second": 0.226, + "step": 100 + }, + { + "epoch": 2.763157894736842, + "grad_norm": 0.41604751594338824, + "learning_rate": 1.70370868554659e-06, + "logits/chosen": -1.6281249523162842, + "logits/rejected": -1.701562523841858, + "logps/chosen": -428.8500061035156, + "logps/rejected": -531.5999755859375, + "loss": 0.4190277099609375, + "memory(GiB)": 67.13, + "nll_loss": 0.4195312559604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.8125, + "rewards/margins": 14.725000381469727, + "rewards/rejected": -0.927734375, + "step": 105, + "train_speed(iter/s)": 0.08056 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 0.2965654266316545, + "learning_rate": 3.380821129028489e-07, + "logits/chosen": -1.6671874523162842, + "logits/rejected": -1.6875, + "logps/chosen": -446.0, + "logps/rejected": -573.7999877929688, + "loss": 0.3798828125, + "memory(GiB)": 67.13, + "nll_loss": 0.3798828125, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.4375, + "rewards/margins": 15.862500190734863, + "rewards/rejected": -1.431249976158142, + "step": 110, + "train_speed(iter/s)": 0.080742 + }, + { + "epoch": 2.8947368421052633, + "eval_logits/chosen": -1.703125, + "eval_logits/rejected": -1.2734375, + "eval_logps/chosen": -128.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.277587890625, + "eval_nll_loss": 0.27734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.25, + "eval_rewards/margins": 10.875, + "eval_rewards/rejected": 0.3515625, + "eval_runtime": 4.4168, + "eval_samples_per_second": 0.906, + "eval_steps_per_second": 0.226, + "step": 110 + }, + { + "epoch": 3.0, + "eval_logits/chosen": -1.703125, + "eval_logits/rejected": -1.2734375, + "eval_logps/chosen": -128.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.27734375, + "eval_nll_loss": 0.27734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.1875, + "eval_rewards/margins": 10.875, + "eval_rewards/rejected": 0.3515625, + "eval_runtime": 4.3362, + "eval_samples_per_second": 0.922, + "eval_steps_per_second": 0.231, + "step": 114 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 479640253628416.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b1fa183495283af14f6d8d5fb1ff6c44483b8cb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d03b97c8a90b75abcac34667608460f42b83e53dabd6e225b8ee6aa53d0d020 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/README.md b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bb68a7d6caeadf83c94bf41a98174b4994de1bfb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-32b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37fbac01e884b4737b62d9f8690242edcb865230 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "k_proj", + "v_proj", + "q_proj", + "up_proj", + "gate_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7188ec17ac77ee71567e05d4edba5c415eb60a66 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07c24def4420ab810d81f64251c1a38f49aa038cb516614de2a4ffa055c39c81 +size 134337704 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/args.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/args.json new file mode 100644 index 0000000000000000000000000000000000000000..116251445760e59feb4fe2e3015344d18d3f7253 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-32b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee16afbb29dad9a19dc10e4e24152d727694517b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:140b10b8152b0798acabd327992b0bf82485ff2f0455d1fdc18647bad2b6b037 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c78b6e2823c6dbbe9b3cb47131366df781e454f7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73273389faa77c9c0644532623a65b5d65eb90fcc957f3b577ffa4936c81f8b4 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a6d7a1a5bce43448c5b371cc8e59f23e0b142a37 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e39964ef400cb7821e384bbe08c5d1cb4c7b5620c74d6fb9d1667494082729d +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b979dccdb70c273231d1a0e0ace3e752962ef49 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff0e3a2d07b52e21e77910610ca1b416d94c6a89fce9f3424b771e41461883c +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cfb27196b64bdcc869423347df9b4a9c7f856f9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9707deb19232db1c33bb6118854a4de00a0924154e7a327ecfc4bb55f24aeaa6 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a889f7167f54f96cef4ed68283001541185d84a9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fb9423bc41bb89b846e0787a808315e231ace7f3bb70e0c9982090c08066d24 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bcac557f1995eb2d066fb0a2dc7269c8d168e64 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30fd0550072704485ab8fef4b8c7871f9d5362fab24f0de3e5c3132815087305 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f69ac79be2cb3fb402721b522478b112cea56475 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b73c16b6df795c246d9f4a691f95ae4e4ae7921aeee6b8d0fbbe62be18965702 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..84f6e05655138cb2c183077640365bbdb4d53984 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4f36d9b56d77055f4f1455f7dbba9bef4402905acf28686157836702329d45a +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d47af3d97f6b0e26c2c92bf7dd281ad184f4dee3 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06f7d1f9bbd4a2c78ed38a2087cb38ecdb6e8260c7318a6d76920b2dc1261e32 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f56de979efa68b215b4eead190912770fe5dea33 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf0d43d9513d732b53c7ec45ce13e033bd48cf37dd35e17b71a3b698f960fe37 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dce5eda351453c8a351aa5379b8f519f8291bc38 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e88dd8b692def807027787e5975f23ec8bec98f15ac6f1f2753dccf64c6f98a +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9337d6678ad945148a0b7d566151166d9056770a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81bfc06d39b12eb5f9736867ace7484df1b7f11843c1a3fc7933303d58736a03 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..868b6b65b8b93da7dfb3eb35852eb5baf3cd2715 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:746f3dbbe62e70fe59fef4f91064c45a4351929431dad77b3aa9a0fe8ce82df7 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb315f9ade2849d0d5f5d4549949f173d0f7305d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a8daf139a8e67152e96758cf6a94bdd58c880092af4299205e60bdd19f283a +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8312aa35b8dd8dd914e61d5b20963f3f5a5dce54 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:301c209ab202ee9b6b96f1857aa3a9cfcbf8f1b971f20e9f8d21d0ca0f959b93 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/latest b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/latest new file mode 100644 index 0000000000000000000000000000000000000000..75eab498d0366633484ab40334e4b8fb92b16dad --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/latest @@ -0,0 +1 @@ +global_step80 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..0b8b4067e4559b34f9b554c4963fe80d7f5fe839 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba4c26c615bd5830d41566fab54dc69174be292761b34514b27fbe82b45b630b +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..8c0265e51b5761ac9b323aa87ba00ba14b97e202 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c761d7f9b90c29c2d348a1133fd39be52c65e6bee4c2d179f6a6e564eb3a40 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f5dd5aabcd6e7332f14a4796d6ec6c758e10aea0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccab847cc956e055fd3f9dcce06898826d065211e945b83576c8d487f87c5469 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..bcbdea3a573c2b7717f23e2ea0e4a6da6670d65d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e5f1dbdcf6ec820c22fd1e4258fcd7af2a2bce65c480988d3f111aa574c9c06 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..1cdcb8d1710063a6c30dec635b4c44e3cb6cd24e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a23184c3e806d2649776427d1da2c0c9137f9b23a84468f3bdd5bbc75f696c9 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e39323a662c284cd109b5ce8c39e8a0ce375f2c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382fc01b809542bf6f5e26742e3e19e80a1f189ac5de24cf8cd822e303916b83 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ce685d2e57181f70debfb25eb90cb76ceaf47da --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b178265c7d2ae07bff10b7312e5e49b9f5b4914c38969d2f64a6ca006296bca +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5e363b8083cdd817e0b3a2e6fd1b65a905e189b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:668825a859126c4cf32afb883895c91004130b6aee02178736ca2840e5429ad0 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eaf96d6803aea265d756d902db3c4cc2386f9742 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90524bcdb94734ac7120e4205110f14662bff8cee00eed50355875dcdc538029 +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..626c509d9727e2f8cc837329b7cb11701caefd98 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/trainer_state.json @@ -0,0 +1,475 @@ +{ + "best_metric": 0.27905273, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80", + "epoch": 2.1052631578947367, + "eval_steps": 10, + "global_step": 80, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 4.767905865350689, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -1.609375, + "logits/rejected": -1.609375, + "logps/chosen": -664.0, + "logps/rejected": -370.0, + "loss": 1.2783203125, + "memory(GiB)": 14.31, + "nll_loss": 0.5859375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.059163 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 5.720924346118727, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.57421875, + "logits/rejected": -1.640625, + "logps/chosen": -676.25, + "logps/rejected": -513.5, + "loss": 1.839111328125, + "memory(GiB)": 24.25, + "nll_loss": 1.1826171875, + "rewards/accuracies": 0.34375, + "rewards/chosen": 0.082916259765625, + "rewards/margins": 0.09368896484375, + "rewards/rejected": -0.0110015869140625, + "step": 5, + "train_speed(iter/s)": 0.080606 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 4.536016569461848, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.5046875476837158, + "logits/rejected": -1.5343749523162842, + "logps/chosen": -572.0, + "logps/rejected": -645.5999755859375, + "loss": 1.83935546875, + "memory(GiB)": 56.43, + "nll_loss": 1.3250000476837158, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.6597656011581421, + "rewards/margins": 0.4574218690395355, + "rewards/rejected": 0.20273438096046448, + "step": 10, + "train_speed(iter/s)": 0.078543 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -1.6171875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -213.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.77734375, + "eval_nll_loss": 0.5546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 2.75, + "eval_rewards/margins": 1.3984375, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 4.3928, + "eval_samples_per_second": 0.911, + "eval_steps_per_second": 0.228, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.6231857601470785, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.578125, + "logits/rejected": -1.618749976158142, + "logps/chosen": -600.0, + "logps/rejected": -515.5999755859375, + "loss": 1.06064453125, + "memory(GiB)": 56.43, + "nll_loss": 0.7710937261581421, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.4437499046325684, + "rewards/margins": 1.9249999523162842, + "rewards/rejected": 1.5187499523162842, + "step": 15, + "train_speed(iter/s)": 0.07979 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 0.7033851800529384, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.623437523841858, + "logps/chosen": -374.6000061035156, + "logps/rejected": -527.5999755859375, + "loss": 0.7858154296875, + "memory(GiB)": 56.43, + "nll_loss": 0.673046886920929, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.862500190734863, + "rewards/margins": 3.2562499046325684, + "rewards/rejected": 3.6031250953674316, + "step": 20, + "train_speed(iter/s)": 0.080777 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -148.0, + "eval_logps/rejected": -1024.0, + "eval_loss": 0.3515625, + "eval_nll_loss": 0.32421875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.25, + "eval_rewards/margins": 4.375, + "eval_rewards/rejected": 4.84375, + "eval_runtime": 4.3823, + "eval_samples_per_second": 0.913, + "eval_steps_per_second": 0.228, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.1907563826066778, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6593749523162842, + "logits/rejected": -1.6203124523162842, + "logps/chosen": -543.5999755859375, + "logps/rejected": -643.7999877929688, + "loss": 0.5309295654296875, + "memory(GiB)": 56.43, + "nll_loss": 0.516406238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.918749809265137, + "rewards/margins": 6.724999904632568, + "rewards/rejected": 1.191796898841858, + "step": 25, + "train_speed(iter/s)": 0.07974 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.8601038031625003, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.717187523841858, + "logits/rejected": -1.6671874523162842, + "logps/chosen": -400.79998779296875, + "logps/rejected": -584.4000244140625, + "loss": 0.47745361328125, + "memory(GiB)": 56.43, + "nll_loss": 0.47734373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.5625, + "rewards/margins": 9.362500190734863, + "rewards/rejected": -0.774609386920929, + "step": 30, + "train_speed(iter/s)": 0.080007 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.734375, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -141.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.3046875, + "eval_nll_loss": 0.3046875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.9375, + "eval_rewards/margins": 9.6875, + "eval_rewards/rejected": 0.25, + "eval_runtime": 4.4114, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.43138365725560973, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.7312500476837158, + "logits/rejected": -1.7765624523162842, + "logps/chosen": -612.2000122070312, + "logps/rejected": -616.0, + "loss": 0.53447265625, + "memory(GiB)": 56.43, + "nll_loss": 0.5328124761581421, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.399999618530273, + "rewards/margins": 9.149999618530273, + "rewards/rejected": 0.22822265326976776, + "step": 35, + "train_speed(iter/s)": 0.079054 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.4899916200761263, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.678125023841858, + "logits/rejected": -1.759374976158142, + "logps/chosen": -474.0, + "logps/rejected": -646.4000244140625, + "loss": 0.506884765625, + "memory(GiB)": 56.43, + "nll_loss": 0.561718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.25, + "rewards/margins": 10.175000190734863, + "rewards/rejected": 0.07539062201976776, + "step": 40, + "train_speed(iter/s)": 0.079664 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.75, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -135.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.291259765625, + "eval_nll_loss": 0.291015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.5, + "eval_rewards/margins": 9.3125, + "eval_rewards/rejected": 1.203125, + "eval_runtime": 4.3597, + "eval_samples_per_second": 0.917, + "eval_steps_per_second": 0.229, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.5474321289483255, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.703125, + "logits/rejected": -1.6203124523162842, + "logps/chosen": -439.6000061035156, + "logps/rejected": -661.2000122070312, + "loss": 0.425408935546875, + "memory(GiB)": 57.88, + "nll_loss": 0.4253906309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.274999618530273, + "rewards/margins": 11.25, + "rewards/rejected": 0.04960937425494194, + "step": 45, + "train_speed(iter/s)": 0.078852 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.4769138301102208, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.7843749523162842, + "logits/rejected": -1.818750023841858, + "logps/chosen": -488.79998779296875, + "logps/rejected": -612.4000244140625, + "loss": 0.4717376708984375, + "memory(GiB)": 57.88, + "nll_loss": 0.4710937440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.949999809265137, + "rewards/margins": 11.824999809265137, + "rewards/rejected": 0.13237304985523224, + "step": 50, + "train_speed(iter/s)": 0.079395 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.78125, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -131.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.283935546875, + "eval_nll_loss": 0.283203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.875, + "eval_rewards/margins": 8.75, + "eval_rewards/rejected": 2.109375, + "eval_runtime": 4.4202, + "eval_samples_per_second": 0.905, + "eval_steps_per_second": 0.226, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.16456472919965845, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.732812523841858, + "logps/chosen": -465.6000061035156, + "logps/rejected": -570.4000244140625, + "loss": 0.45333251953125, + "memory(GiB)": 57.88, + "nll_loss": 0.4535156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.800000190734863, + "rewards/margins": 12.287500381469727, + "rewards/rejected": 0.517578125, + "step": 55, + "train_speed(iter/s)": 0.079928 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.13988896145691967, + "learning_rate": 5e-05, + "logits/chosen": -1.65625, + "logits/rejected": -1.6671874523162842, + "logps/chosen": -504.3999938964844, + "logps/rejected": -441.6000061035156, + "loss": 0.41478271484375, + "memory(GiB)": 57.88, + "nll_loss": 0.4144531190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.5625, + "rewards/margins": 13.162500381469727, + "rewards/rejected": 0.392578125, + "step": 60, + "train_speed(iter/s)": 0.080587 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1048.0, + "eval_loss": 0.282958984375, + "eval_nll_loss": 0.28125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 8.5, + "eval_rewards/rejected": 2.5, + "eval_runtime": 4.3947, + "eval_samples_per_second": 0.91, + "eval_steps_per_second": 0.228, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.26327409929500534, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.6953125, + "logits/rejected": -1.671875, + "logps/chosen": -445.3999938964844, + "logps/rejected": -494.0, + "loss": 0.402728271484375, + "memory(GiB)": 57.88, + "nll_loss": 0.40156251192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.25, + "rewards/margins": 13.037500381469727, + "rewards/rejected": 0.18845824897289276, + "step": 65, + "train_speed(iter/s)": 0.08052 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.2544494877535854, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.6796875, + "logits/rejected": -1.701562523841858, + "logps/chosen": -397.3999938964844, + "logps/rejected": -596.0, + "loss": 0.433489990234375, + "memory(GiB)": 57.88, + "nll_loss": 0.43281251192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.425000190734863, + "rewards/margins": 13.75, + "rewards/rejected": -0.3185058534145355, + "step": 70, + "train_speed(iter/s)": 0.080662 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.6953125, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.279296875, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 10.375, + "eval_rewards/rejected": 0.703125, + "eval_runtime": 4.4096, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.4107814395722938, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.6296875476837158, + "logits/rejected": -1.6124999523162842, + "logps/chosen": -498.3999938964844, + "logps/rejected": -641.2000122070312, + "loss": 0.4626708984375, + "memory(GiB)": 57.88, + "nll_loss": 0.4625000059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.237500190734863, + "rewards/margins": 14.0, + "rewards/rejected": -0.741406261920929, + "step": 75, + "train_speed(iter/s)": 0.080513 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.3636613929853374, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.712499976158142, + "logits/rejected": -1.649999976158142, + "logps/chosen": -427.6000061035156, + "logps/rejected": -590.7999877929688, + "loss": 0.4591552734375, + "memory(GiB)": 57.88, + "nll_loss": 0.4769531190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.100000381469727, + "rewards/margins": 14.237500190734863, + "rewards/rejected": -1.1325194835662842, + "step": 80, + "train_speed(iter/s)": 0.080112 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -129.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.279052734375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 10.6875, + "eval_rewards/rejected": 0.453125, + "eval_runtime": 4.3677, + "eval_samples_per_second": 0.916, + "eval_steps_per_second": 0.229, + "step": 80 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 338904851939328.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b1fa183495283af14f6d8d5fb1ff6c44483b8cb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d03b97c8a90b75abcac34667608460f42b83e53dabd6e225b8ee6aa53d0d020 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-80/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/README.md b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bb68a7d6caeadf83c94bf41a98174b4994de1bfb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-32b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37fbac01e884b4737b62d9f8690242edcb865230 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "k_proj", + "v_proj", + "q_proj", + "up_proj", + "gate_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4b82e9dae385d39698316e1434ce5c604b79d90 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb3a2351870a0fe7714d6c868e5a0c7fd8bd47d1c326b6db5706b59e9a4a96c0 +size 134337704 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/args.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/args.json new file mode 100644 index 0000000000000000000000000000000000000000..116251445760e59feb4fe2e3015344d18d3f7253 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-32b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-32b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..93b4912e82084ff130ec7f0d2aa8953c933a985b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a71721cb073bc9515e3417280e984d1e535c9ccf6612a5d8e9dcc6959230652 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d453dba815db384bcc7d8dc69afd000fe418460 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:876c7fb575bb44fd421b57a4309033bb7e273c8dbe02ec007ed3727ea19db293 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e80b2bfe6f3f26b71f3f4d79b2c550ed11e0c497 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccc3f411543c96ce8a38216b2718e445fdb0e65f445601f17ca7262283d9639c +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2cf64f03fe34a1cce41713932befe0893a7dfe6e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8be4868cc2451616c4baa85e696ffeae1836240dd6859f7115555b04fee6ed70 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b082bdac844993db0c610e2ceee2c4b6b701c17 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7651f024d282a6d50f81045473778bf00dc8b38aaa7626319c97b67675e4ac4 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..498b30b75a7267fcc6f1ba7863b9d4e7ab6a6f23 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7574c806add9f5827a4b67b1c3df983add08c0e8d376d5b8595967ce03260169 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d7ac5d8ed71657564a93f2755430a3405d1e459 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fb88c68c1d50abb5fe90f6a159b4e7de9ac24a5b0770e7475057d8a59547487 +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..28969ce5cf88fa8fc7d875b339fa64f0f5a8612f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dc11f15239c03083e2a7fb93e94a881849ceed80e0862313fe36bde48e9547d +size 100667312 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..62f872fb1d9aaed33a4fc71ee48afe248b499c66 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80390aca50881c077877400c9645fa6442f4ef0b2e7430e27ccbdfee4a7db767 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..86b7237e11d68131a1a2899bb224f927200e26c8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d300157f4c0b0093f99ef18c13feb7498361fe9b3740cedb495b0a69173b48c +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ff5bd63257d675e27ed07007b1452710554b01f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad8c30ed96b214a13f8a0e9c4a41e4e41be0cace28c54dd5e156db1a1d2868d +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2f8b1d8f0a2b705106db457027badd4e5f6ed53 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b9c95184ba528a64e814f49fd06a9b5b171eb4fe3c2dd62b7a5667c77d768fe +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b80039a193b53c0577c8ecb6e47c56b6bacd16a8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b0a9b5b40b7d1b6b7196af3d9c6fe16600aca003357962d9aeee37da034957 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8367f14b958ba2bde1d2b1fc90f1a7459fee1197 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e152f646099d4ccf09689480b9ba7d5ccee1fc1e0958521d2452833758425a6 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3f5fea75edfa3da191b52baaea356a1f300d8d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5801812ee46be8ba79ddc3a15a07734175946500058cbdb7003f35840949419a +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..275ede459c9fabbc1e293a34c4f62f03a57b427a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56969a6801036568d5a89aa1d69876922e96bed310078bbf8b1cded5118ce322 +size 886254 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/latest b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/latest new file mode 100644 index 0000000000000000000000000000000000000000..8e7a337e2cb23bf07023d223dd647df2d25f0fc1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/latest @@ -0,0 +1 @@ +global_step90 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e31a2394e12bf431ae13288c3d90fe4727f07fa7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feb6462d333dbc5bb5e497ea9b0adb960f7616f79e6eea63222de6d5bd559516 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d1db0a0f44aa3ac1d82c3bf8dc2d8968eeba4ce7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b045e1bfa728f51c8b51ab0faa20b128a4fbd350da006b9b39a19e24abdf5a74 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..75de18f57a056bd6a5f89df1abd045678f3f919e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76a3d058d2628a61848c2441d313f251278bd8f74ce43dc44d8cd8ad3e619a8 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fd100693bc9f3267d044ce4a16e702502dc03ec --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f72fc498e6eaa671cdc0e8a627a668b8ef607063a22ddb4edbc05e791be830 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..5aeeabfe119f1cb0c8c804f1b9a4d3049f478d69 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12889af98e175b734a788f4c5b8c4da91dd61ff3a05aaf61b9d4c66aa3dd8ad6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..91fe0f42382ab06f4d26d753745a914c9e46100e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe21a86abfceeac2cf2f48afd61a9a506cf61a287f3403f1adf391bb2ffa5a83 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..5830ca6bd04645962b6e56a00a91cd8349ca449c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73488bec91f9dee6d8105d06f99edaf4d27b6b064250d4c7023f33285b2f3132 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..343d1c0475f0dc64100dc67b09195e047f1a7bcf --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf6ee1cc2e1325b428a21172ec4e61b7220c5489751ea11c06bb66c77a0cd08 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a39c9cfeaa2d69cb5a66e83272eee65ddffaed5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b768777109679597db5d1fa24a743962bede33623e22702b13b95eab2d42cb8 +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4a520c58c535e5b264df7a6daacadc028992a901 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/trainer_state.json @@ -0,0 +1,528 @@ +{ + "best_metric": 0.27758789, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90", + "epoch": 2.3684210526315788, + "eval_steps": 10, + "global_step": 90, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 4.767905865350689, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": -1.609375, + "logits/rejected": -1.609375, + "logps/chosen": -664.0, + "logps/rejected": -370.0, + "loss": 1.2783203125, + "memory(GiB)": 14.31, + "nll_loss": 0.5859375, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.059163 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 5.720924346118727, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -1.57421875, + "logits/rejected": -1.640625, + "logps/chosen": -676.25, + "logps/rejected": -513.5, + "loss": 1.839111328125, + "memory(GiB)": 24.25, + "nll_loss": 1.1826171875, + "rewards/accuracies": 0.34375, + "rewards/chosen": 0.082916259765625, + "rewards/margins": 0.09368896484375, + "rewards/rejected": -0.0110015869140625, + "step": 5, + "train_speed(iter/s)": 0.080606 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 4.536016569461848, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": -1.5046875476837158, + "logits/rejected": -1.5343749523162842, + "logps/chosen": -572.0, + "logps/rejected": -645.5999755859375, + "loss": 1.83935546875, + "memory(GiB)": 56.43, + "nll_loss": 1.3250000476837158, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.6597656011581421, + "rewards/margins": 0.4574218690395355, + "rewards/rejected": 0.20273438096046448, + "step": 10, + "train_speed(iter/s)": 0.078543 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -1.6171875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -213.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.77734375, + "eval_nll_loss": 0.5546875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 2.75, + "eval_rewards/margins": 1.3984375, + "eval_rewards/rejected": 1.3515625, + "eval_runtime": 4.3928, + "eval_samples_per_second": 0.911, + "eval_steps_per_second": 0.228, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 2.6231857601470785, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -1.578125, + "logits/rejected": -1.618749976158142, + "logps/chosen": -600.0, + "logps/rejected": -515.5999755859375, + "loss": 1.06064453125, + "memory(GiB)": 56.43, + "nll_loss": 0.7710937261581421, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.4437499046325684, + "rewards/margins": 1.9249999523162842, + "rewards/rejected": 1.5187499523162842, + "step": 15, + "train_speed(iter/s)": 0.07979 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 0.7033851800529384, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.623437523841858, + "logps/chosen": -374.6000061035156, + "logps/rejected": -527.5999755859375, + "loss": 0.7858154296875, + "memory(GiB)": 56.43, + "nll_loss": 0.673046886920929, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 6.862500190734863, + "rewards/margins": 3.2562499046325684, + "rewards/rejected": 3.6031250953674316, + "step": 20, + "train_speed(iter/s)": 0.080777 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.2890625, + "eval_logps/chosen": -148.0, + "eval_logps/rejected": -1024.0, + "eval_loss": 0.3515625, + "eval_nll_loss": 0.32421875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.25, + "eval_rewards/margins": 4.375, + "eval_rewards/rejected": 4.84375, + "eval_runtime": 4.3823, + "eval_samples_per_second": 0.913, + "eval_steps_per_second": 0.228, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 1.1907563826066778, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -1.6593749523162842, + "logits/rejected": -1.6203124523162842, + "logps/chosen": -543.5999755859375, + "logps/rejected": -643.7999877929688, + "loss": 0.5309295654296875, + "memory(GiB)": 56.43, + "nll_loss": 0.516406238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 7.918749809265137, + "rewards/margins": 6.724999904632568, + "rewards/rejected": 1.191796898841858, + "step": 25, + "train_speed(iter/s)": 0.07974 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.8601038031625003, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -1.717187523841858, + "logits/rejected": -1.6671874523162842, + "logps/chosen": -400.79998779296875, + "logps/rejected": -584.4000244140625, + "loss": 0.47745361328125, + "memory(GiB)": 56.43, + "nll_loss": 0.47734373807907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 8.5625, + "rewards/margins": 9.362500190734863, + "rewards/rejected": -0.774609386920929, + "step": 30, + "train_speed(iter/s)": 0.080007 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -1.734375, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -141.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.3046875, + "eval_nll_loss": 0.3046875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 9.9375, + "eval_rewards/margins": 9.6875, + "eval_rewards/rejected": 0.25, + "eval_runtime": 4.4114, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 0.43138365725560973, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -1.7312500476837158, + "logits/rejected": -1.7765624523162842, + "logps/chosen": -612.2000122070312, + "logps/rejected": -616.0, + "loss": 0.53447265625, + "memory(GiB)": 56.43, + "nll_loss": 0.5328124761581421, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.399999618530273, + "rewards/margins": 9.149999618530273, + "rewards/rejected": 0.22822265326976776, + "step": 35, + "train_speed(iter/s)": 0.079054 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 0.4899916200761263, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -1.678125023841858, + "logits/rejected": -1.759374976158142, + "logps/chosen": -474.0, + "logps/rejected": -646.4000244140625, + "loss": 0.506884765625, + "memory(GiB)": 56.43, + "nll_loss": 0.561718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.25, + "rewards/margins": 10.175000190734863, + "rewards/rejected": 0.07539062201976776, + "step": 40, + "train_speed(iter/s)": 0.079664 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.75, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -135.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.291259765625, + "eval_nll_loss": 0.291015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.5, + "eval_rewards/margins": 9.3125, + "eval_rewards/rejected": 1.203125, + "eval_runtime": 4.3597, + "eval_samples_per_second": 0.917, + "eval_steps_per_second": 0.229, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.5474321289483255, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -1.703125, + "logits/rejected": -1.6203124523162842, + "logps/chosen": -439.6000061035156, + "logps/rejected": -661.2000122070312, + "loss": 0.425408935546875, + "memory(GiB)": 57.88, + "nll_loss": 0.4253906309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.274999618530273, + "rewards/margins": 11.25, + "rewards/rejected": 0.04960937425494194, + "step": 45, + "train_speed(iter/s)": 0.078852 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.4769138301102208, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": -1.7843749523162842, + "logits/rejected": -1.818750023841858, + "logps/chosen": -488.79998779296875, + "logps/rejected": -612.4000244140625, + "loss": 0.4717376708984375, + "memory(GiB)": 57.88, + "nll_loss": 0.4710937440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.949999809265137, + "rewards/margins": 11.824999809265137, + "rewards/rejected": 0.13237304985523224, + "step": 50, + "train_speed(iter/s)": 0.079395 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.78125, + "eval_logits/rejected": -1.3046875, + "eval_logps/chosen": -131.0, + "eval_logps/rejected": -1056.0, + "eval_loss": 0.283935546875, + "eval_nll_loss": 0.283203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 10.875, + "eval_rewards/margins": 8.75, + "eval_rewards/rejected": 2.109375, + "eval_runtime": 4.4202, + "eval_samples_per_second": 0.905, + "eval_steps_per_second": 0.226, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.16456472919965845, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -1.649999976158142, + "logits/rejected": -1.732812523841858, + "logps/chosen": -465.6000061035156, + "logps/rejected": -570.4000244140625, + "loss": 0.45333251953125, + "memory(GiB)": 57.88, + "nll_loss": 0.4535156190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.800000190734863, + "rewards/margins": 12.287500381469727, + "rewards/rejected": 0.517578125, + "step": 55, + "train_speed(iter/s)": 0.079928 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.13988896145691967, + "learning_rate": 5e-05, + "logits/chosen": -1.65625, + "logits/rejected": -1.6671874523162842, + "logps/chosen": -504.3999938964844, + "logps/rejected": -441.6000061035156, + "loss": 0.41478271484375, + "memory(GiB)": 57.88, + "nll_loss": 0.4144531190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.5625, + "rewards/margins": 13.162500381469727, + "rewards/rejected": 0.392578125, + "step": 60, + "train_speed(iter/s)": 0.080587 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.7265625, + "eval_logits/rejected": -1.296875, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1048.0, + "eval_loss": 0.282958984375, + "eval_nll_loss": 0.28125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0, + "eval_rewards/margins": 8.5, + "eval_rewards/rejected": 2.5, + "eval_runtime": 4.3947, + "eval_samples_per_second": 0.91, + "eval_steps_per_second": 0.228, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.26327409929500534, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -1.6953125, + "logits/rejected": -1.671875, + "logps/chosen": -445.3999938964844, + "logps/rejected": -494.0, + "loss": 0.402728271484375, + "memory(GiB)": 57.88, + "nll_loss": 0.40156251192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.25, + "rewards/margins": 13.037500381469727, + "rewards/rejected": 0.18845824897289276, + "step": 65, + "train_speed(iter/s)": 0.08052 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.2544494877535854, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": -1.6796875, + "logits/rejected": -1.701562523841858, + "logps/chosen": -397.3999938964844, + "logps/rejected": -596.0, + "loss": 0.433489990234375, + "memory(GiB)": 57.88, + "nll_loss": 0.43281251192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.425000190734863, + "rewards/margins": 13.75, + "rewards/rejected": -0.3185058534145355, + "step": 70, + "train_speed(iter/s)": 0.080662 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.6953125, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -130.0, + "eval_logps/rejected": -1064.0, + "eval_loss": 0.279296875, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.0625, + "eval_rewards/margins": 10.375, + "eval_rewards/rejected": 0.703125, + "eval_runtime": 4.4096, + "eval_samples_per_second": 0.907, + "eval_steps_per_second": 0.227, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.4107814395722938, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -1.6296875476837158, + "logits/rejected": -1.6124999523162842, + "logps/chosen": -498.3999938964844, + "logps/rejected": -641.2000122070312, + "loss": 0.4626708984375, + "memory(GiB)": 57.88, + "nll_loss": 0.4625000059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.237500190734863, + "rewards/margins": 14.0, + "rewards/rejected": -0.741406261920929, + "step": 75, + "train_speed(iter/s)": 0.080513 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.3636613929853374, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -1.712499976158142, + "logits/rejected": -1.649999976158142, + "logps/chosen": -427.6000061035156, + "logps/rejected": -590.7999877929688, + "loss": 0.4591552734375, + "memory(GiB)": 57.88, + "nll_loss": 0.4769531190395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.100000381469727, + "rewards/margins": 14.237500190734863, + "rewards/rejected": -1.1325194835662842, + "step": 80, + "train_speed(iter/s)": 0.080112 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.6875, + "eval_logits/rejected": -1.28125, + "eval_logps/chosen": -129.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.279052734375, + "eval_nll_loss": 0.279296875, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.125, + "eval_rewards/margins": 10.6875, + "eval_rewards/rejected": 0.453125, + "eval_runtime": 4.3677, + "eval_samples_per_second": 0.916, + "eval_steps_per_second": 0.229, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.43974717734245733, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -1.618749976158142, + "logits/rejected": -1.631250023841858, + "logps/chosen": -380.3999938964844, + "logps/rejected": -703.2000122070312, + "loss": 0.3943389892578125, + "memory(GiB)": 57.88, + "nll_loss": 0.3941406309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 13.050000190734863, + "rewards/margins": 13.350000381469727, + "rewards/rejected": -0.31855469942092896, + "step": 85, + "train_speed(iter/s)": 0.080127 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.21291018894177471, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -1.603124976158142, + "logits/rejected": -1.631250023841858, + "logps/chosen": -482.79998779296875, + "logps/rejected": -551.5999755859375, + "loss": 0.42427978515625, + "memory(GiB)": 57.88, + "nll_loss": 0.42460936307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.637499809265137, + "rewards/margins": 14.949999809265137, + "rewards/rejected": -0.3154296875, + "step": 90, + "train_speed(iter/s)": 0.080807 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.703125, + "eval_logits/rejected": -1.2734375, + "eval_logps/chosen": -128.0, + "eval_logps/rejected": -1072.0, + "eval_loss": 0.277587890625, + "eval_nll_loss": 0.27734375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.25, + "eval_rewards/margins": 10.75, + "eval_rewards/rejected": 0.453125, + "eval_runtime": 4.3944, + "eval_samples_per_second": 0.91, + "eval_steps_per_second": 0.228, + "step": 90 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 378617444958208.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b1fa183495283af14f6d8d5fb1ff6c44483b8cb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d03b97c8a90b75abcac34667608460f42b83e53dabd6e225b8ee6aa53d0d020 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-90/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_logits_chosen.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_logits_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..86dc55e5d336754a0c03e106a102048fb46e381d Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_logits_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_logits_rejected.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_logits_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..d621c80b149cbe25d784dfbe877ea56749e508e4 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_logits_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_logps_chosen.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_logps_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..0d383effe98a54aa68a39ccc9d3ef296aabd327b Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_logps_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_logps_rejected.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_logps_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..814f984c43ce96217b81d5fbefebbd147633ec58 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_logps_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_loss.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..7db0b18f5afb2f7548d58d8ec8daca431547a7ed Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_nll_loss.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_nll_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..9f618aa958dfb70691a2f60c23f3a15cb280f493 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_nll_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_rewards_accuracies.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_rewards_accuracies.png new file mode 100644 index 0000000000000000000000000000000000000000..2d2698dd0f6f8ad18db2b94a0c23ae94df643a78 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_rewards_accuracies.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_rewards_chosen.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_rewards_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..1891d4b5310f589d3c6f32ba295f48ac409de657 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_rewards_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_rewards_margins.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_rewards_margins.png new file mode 100644 index 0000000000000000000000000000000000000000..8b6a02da94249be1e4363f8416fbeae932f6e83e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_rewards_margins.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_rewards_rejected.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_rewards_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..ba8896ce2de8ad522cacbf1eb081d4e96b4aca59 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_rewards_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_runtime.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..d4dd674f3b88414e99f7e2b13fbfb5bb15957bad Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_runtime.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_samples_per_second.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_samples_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..4d796f5462a682651187eca7036d9437337f4a74 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_samples_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_steps_per_second.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_steps_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..50429d57a5c280a8e289bf97462f07c6fc5b34e0 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/eval_steps_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_epoch.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_epoch.png new file mode 100644 index 0000000000000000000000000000000000000000..c152c9bbb1100d680125e899d3d0da3db98dc221 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_epoch.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_grad_norm.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_grad_norm.png new file mode 100644 index 0000000000000000000000000000000000000000..07b51ef23c3b1d21d206d420ec217473a52ce392 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_grad_norm.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_learning_rate.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..e35e42c173f451f9666ba448872c615528441c9e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_learning_rate.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_logits_chosen.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_logits_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..c04a03f30dbcb44ac82373481564ad9c06da9532 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_logits_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_logits_rejected.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_logits_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..8c9d27c508fb52c9b7749be4fc959090843dfb6e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_logits_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_logps_chosen.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_logps_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..cbb61162a75dca644d4fa78c3342c1b767d20387 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_logps_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_logps_rejected.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_logps_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..a136c8a18fa07510fbffba561ab8bd3fb56aa091 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_logps_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_loss.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..f391976ac015f96c6b779cfcd90e60ef31eadc9e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_memory(GiB).png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_memory(GiB).png new file mode 100644 index 0000000000000000000000000000000000000000..16a2a7ed7a5a20149fc0f0ddd14639272739619a Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_memory(GiB).png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_nll_loss.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_nll_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..10939029768f99b4f0cc0c05ddd0df512db3a6cf Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_nll_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_rewards_accuracies.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_rewards_accuracies.png new file mode 100644 index 0000000000000000000000000000000000000000..5d079e84bfbd2648e212ae5886fa6ee43c876360 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_rewards_accuracies.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_rewards_chosen.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_rewards_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..81ddf515982db95dbf9988603325c47af2a3eac4 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_rewards_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_rewards_margins.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_rewards_margins.png new file mode 100644 index 0000000000000000000000000000000000000000..271223d9567021cb7bdd7922e5b753ef529ce514 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_rewards_margins.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_rewards_rejected.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_rewards_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..d71c128aa34fe13a02cc246b8aa2cfe139369619 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_rewards_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_total_flos.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_total_flos.png new file mode 100644 index 0000000000000000000000000000000000000000..bfc59c9dd1f191a363fcc9b67d0538ab92317d82 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_total_flos.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_loss.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..d178395e9149c2032161ea6cea133613c4fd0949 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_runtime.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..a1c45559acaea8e1dd6ed775f230651ca4b0a033 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_runtime.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_samples_per_second.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_samples_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..90616951ad3fd1e759a728302e189905a17138d9 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_samples_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_speed(iter_s).png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_speed(iter_s).png new file mode 100644 index 0000000000000000000000000000000000000000..016cdf5331e4c03761f1ac5b1393e878bfb7ed54 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_speed(iter_s).png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_steps_per_second.png b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_steps_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..5c3b9c717ae2f3cd532c7a2eb9154696900e21e5 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/images/train_train_steps_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/logging.jsonl b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/logging.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ff434387f4ed6ba702c39008eacb11414f0a8617 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/logging.jsonl @@ -0,0 +1,37 @@ +{"loss": 1.27832031, "grad_norm": 4.76790587, "learning_rate": 1.667e-05, "memory(GiB)": 14.31, "train_speed(iter/s)": 0.059163, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -664.0, "logps/rejected": -370.0, "logits/chosen": -1.609375, "logits/rejected": -1.609375, "nll_loss": 0.5859375, "epoch": 0.02631579, "global_step/max_steps": "1/114", "percentage": "0.88%", "elapsed_time": "12s", "remaining_time": "23m 59s"} +{"loss": 1.83911133, "grad_norm": 5.72092435, "learning_rate": 8.333e-05, "memory(GiB)": 24.25, "train_speed(iter/s)": 0.080606, "rewards/chosen": 0.08291626, "rewards/rejected": -0.01100159, "rewards/accuracies": 0.34375, "rewards/margins": 0.09368896, "logps/chosen": -676.25, "logps/rejected": -513.5, "logits/chosen": -1.57421875, "logits/rejected": -1.640625, "nll_loss": 1.18261719, "epoch": 0.13157895, "global_step/max_steps": "5/114", "percentage": "4.39%", "elapsed_time": "57s", "remaining_time": "21m 1s"} +{"loss": 1.83935547, "grad_norm": 4.53601657, "learning_rate": 9.966e-05, "memory(GiB)": 56.43, "train_speed(iter/s)": 0.078543, "rewards/chosen": 0.6597656, "rewards/rejected": 0.20273438, "rewards/accuracies": 0.72500002, "rewards/margins": 0.45742187, "logps/chosen": -572.0, "logps/rejected": -645.59997559, "logits/chosen": -1.50468755, "logits/rejected": -1.53437495, "nll_loss": 1.32500005, "epoch": 0.26315789, "global_step/max_steps": "10/114", "percentage": "8.77%", "elapsed_time": "2m 3s", "remaining_time": "21m 20s"} +{"eval_loss": 0.77734375, "eval_runtime": 4.3928, "eval_samples_per_second": 0.911, "eval_steps_per_second": 0.228, "eval_rewards/chosen": 2.75, "eval_rewards/rejected": 1.3515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 1.3984375, "eval_logps/chosen": -213.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -1.6171875, "eval_logits/rejected": -1.2890625, "eval_nll_loss": 0.5546875, "epoch": 0.26315789, "global_step/max_steps": "10/114", "percentage": "8.77%", "elapsed_time": "2m 7s", "remaining_time": "22m 6s"} +{"loss": 1.06064453, "grad_norm": 2.62318576, "learning_rate": 9.83e-05, "memory(GiB)": 56.43, "train_speed(iter/s)": 0.07979, "rewards/chosen": 3.4437499, "rewards/rejected": 1.51874995, "rewards/accuracies": 0.89999998, "rewards/margins": 1.92499995, "logps/chosen": -600.0, "logps/rejected": -515.59997559, "logits/chosen": -1.578125, "logits/rejected": -1.61874998, "nll_loss": 0.77109373, "epoch": 0.39473684, "global_step/max_steps": "15/114", "percentage": "13.16%", "elapsed_time": "3m 3s", "remaining_time": "20m 13s"} +{"loss": 0.78581543, "grad_norm": 0.70338518, "learning_rate": 9.591e-05, "memory(GiB)": 56.43, "train_speed(iter/s)": 0.080777, "rewards/chosen": 6.86250019, "rewards/rejected": 3.6031251, "rewards/accuracies": 0.97500002, "rewards/margins": 3.2562499, "logps/chosen": -374.6000061, "logps/rejected": -527.59997559, "logits/chosen": -1.64999998, "logits/rejected": -1.62343752, "nll_loss": 0.67304689, "epoch": 0.52631579, "global_step/max_steps": "20/114", "percentage": "17.54%", "elapsed_time": "4m 3s", "remaining_time": "19m 4s"} +{"eval_loss": 0.3515625, "eval_runtime": 4.3823, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.228, "eval_rewards/chosen": 9.25, "eval_rewards/rejected": 4.84375, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 4.375, "eval_logps/chosen": -148.0, "eval_logps/rejected": -1024.0, "eval_logits/chosen": -1.6875, "eval_logits/rejected": -1.2890625, "eval_nll_loss": 0.32421875, "epoch": 0.52631579, "global_step/max_steps": "20/114", "percentage": "17.54%", "elapsed_time": "4m 7s", "remaining_time": "19m 24s"} +{"loss": 0.53092957, "grad_norm": 1.19075638, "learning_rate": 9.256e-05, "memory(GiB)": 56.43, "train_speed(iter/s)": 0.07974, "rewards/chosen": 7.91874981, "rewards/rejected": 1.1917969, "rewards/accuracies": 1.0, "rewards/margins": 6.7249999, "logps/chosen": -543.59997559, "logps/rejected": -643.79998779, "logits/chosen": -1.65937495, "logits/rejected": -1.62031245, "nll_loss": 0.51640624, "epoch": 0.65789474, "global_step/max_steps": "25/114", "percentage": "21.93%", "elapsed_time": "5m 9s", "remaining_time": "18m 21s"} +{"loss": 0.47745361, "grad_norm": 0.8601038, "learning_rate": 8.83e-05, "memory(GiB)": 56.43, "train_speed(iter/s)": 0.080007, "rewards/chosen": 8.5625, "rewards/rejected": -0.77460939, "rewards/accuracies": 1.0, "rewards/margins": 9.36250019, "logps/chosen": -400.79998779, "logps/rejected": -584.40002441, "logits/chosen": -1.71718752, "logits/rejected": -1.66718745, "nll_loss": 0.47734374, "epoch": 0.78947368, "global_step/max_steps": "30/114", "percentage": "26.32%", "elapsed_time": "6m 10s", "remaining_time": "17m 18s"} +{"eval_loss": 0.3046875, "eval_runtime": 4.4114, "eval_samples_per_second": 0.907, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 9.9375, "eval_rewards/rejected": 0.25, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.6875, "eval_logps/chosen": -141.0, "eval_logps/rejected": -1072.0, "eval_logits/chosen": -1.734375, "eval_logits/rejected": -1.296875, "eval_nll_loss": 0.3046875, "epoch": 0.78947368, "global_step/max_steps": "30/114", "percentage": "26.32%", "elapsed_time": "6m 15s", "remaining_time": "17m 30s"} +{"loss": 0.53447266, "grad_norm": 0.43138366, "learning_rate": 8.324e-05, "memory(GiB)": 56.43, "train_speed(iter/s)": 0.079054, "rewards/chosen": 9.39999962, "rewards/rejected": 0.22822265, "rewards/accuracies": 1.0, "rewards/margins": 9.14999962, "logps/chosen": -612.20001221, "logps/rejected": -616.0, "logits/chosen": -1.73125005, "logits/rejected": -1.77656245, "nll_loss": 0.53281248, "epoch": 0.92105263, "global_step/max_steps": "35/114", "percentage": "30.70%", "elapsed_time": "7m 18s", "remaining_time": "16m 29s"} +{"loss": 0.50688477, "grad_norm": 0.48999162, "learning_rate": 7.748e-05, "memory(GiB)": 56.43, "train_speed(iter/s)": 0.079664, "rewards/chosen": 10.25, "rewards/rejected": 0.07539062, "rewards/accuracies": 1.0, "rewards/margins": 10.17500019, "logps/chosen": -474.0, "logps/rejected": -646.40002441, "logits/chosen": -1.67812502, "logits/rejected": -1.75937498, "nll_loss": 0.56171876, "epoch": 1.05263158, "global_step/max_steps": "40/114", "percentage": "35.09%", "elapsed_time": "8m 17s", "remaining_time": "15m 21s"} +{"eval_loss": 0.29125977, "eval_runtime": 4.3597, "eval_samples_per_second": 0.917, "eval_steps_per_second": 0.229, "eval_rewards/chosen": 10.5, "eval_rewards/rejected": 1.203125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.3125, "eval_logps/chosen": -135.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.75, "eval_logits/rejected": -1.3046875, "eval_nll_loss": 0.29101562, "epoch": 1.05263158, "global_step/max_steps": "40/114", "percentage": "35.09%", "elapsed_time": "8m 22s", "remaining_time": "15m 29s"} +{"loss": 0.42540894, "grad_norm": 0.54743213, "learning_rate": 7.113e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.078852, "rewards/chosen": 11.27499962, "rewards/rejected": 0.04960937, "rewards/accuracies": 1.0, "rewards/margins": 11.25, "logps/chosen": -439.6000061, "logps/rejected": -661.20001221, "logits/chosen": -1.703125, "logits/rejected": -1.62031245, "nll_loss": 0.42539063, "epoch": 1.18421053, "global_step/max_steps": "45/114", "percentage": "39.47%", "elapsed_time": "9m 26s", "remaining_time": "14m 28s"} +{"loss": 0.47173767, "grad_norm": 0.47691383, "learning_rate": 6.434e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.079395, "rewards/chosen": 11.94999981, "rewards/rejected": 0.13237305, "rewards/accuracies": 1.0, "rewards/margins": 11.82499981, "logps/chosen": -488.79998779, "logps/rejected": -612.40002441, "logits/chosen": -1.78437495, "logits/rejected": -1.81875002, "nll_loss": 0.47109374, "epoch": 1.31578947, "global_step/max_steps": "50/114", "percentage": "43.86%", "elapsed_time": "10m 25s", "remaining_time": "13m 20s"} +{"eval_loss": 0.28393555, "eval_runtime": 4.4202, "eval_samples_per_second": 0.905, "eval_steps_per_second": 0.226, "eval_rewards/chosen": 10.875, "eval_rewards/rejected": 2.109375, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 8.75, "eval_logps/chosen": -131.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -1.78125, "eval_logits/rejected": -1.3046875, "eval_nll_loss": 0.28320312, "epoch": 1.31578947, "global_step/max_steps": "50/114", "percentage": "43.86%", "elapsed_time": "10m 30s", "remaining_time": "13m 26s"} +{"loss": 0.45333252, "grad_norm": 0.16456473, "learning_rate": 5.725e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.079928, "rewards/chosen": 12.80000019, "rewards/rejected": 0.51757812, "rewards/accuracies": 1.0, "rewards/margins": 12.28750038, "logps/chosen": -465.6000061, "logps/rejected": -570.40002441, "logits/chosen": -1.64999998, "logits/rejected": -1.73281252, "nll_loss": 0.45351562, "epoch": 1.44736842, "global_step/max_steps": "55/114", "percentage": "48.25%", "elapsed_time": "11m 23s", "remaining_time": "12m 13s"} +{"loss": 0.41478271, "grad_norm": 0.13988896, "learning_rate": 5e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.080587, "rewards/chosen": 13.5625, "rewards/rejected": 0.39257812, "rewards/accuracies": 1.0, "rewards/margins": 13.16250038, "logps/chosen": -504.3999939, "logps/rejected": -441.6000061, "logits/chosen": -1.65625, "logits/rejected": -1.66718745, "nll_loss": 0.41445312, "epoch": 1.57894737, "global_step/max_steps": "60/114", "percentage": "52.63%", "elapsed_time": "12m 20s", "remaining_time": "11m 6s"} +{"eval_loss": 0.28295898, "eval_runtime": 4.3947, "eval_samples_per_second": 0.91, "eval_steps_per_second": 0.228, "eval_rewards/chosen": 11.0, "eval_rewards/rejected": 2.5, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 8.5, "eval_logps/chosen": -130.0, "eval_logps/rejected": -1048.0, "eval_logits/chosen": -1.7265625, "eval_logits/rejected": -1.296875, "eval_nll_loss": 0.28125, "epoch": 1.57894737, "global_step/max_steps": "60/114", "percentage": "52.63%", "elapsed_time": "12m 24s", "remaining_time": "11m 10s"} +{"loss": 0.40272827, "grad_norm": 0.2632741, "learning_rate": 4.275e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.08052, "rewards/chosen": 13.25, "rewards/rejected": 0.18845825, "rewards/accuracies": 1.0, "rewards/margins": 13.03750038, "logps/chosen": -445.3999939, "logps/rejected": -494.0, "logits/chosen": -1.6953125, "logits/rejected": -1.671875, "nll_loss": 0.40156251, "epoch": 1.71052632, "global_step/max_steps": "65/114", "percentage": "57.02%", "elapsed_time": "13m 23s", "remaining_time": "10m 5s"} +{"loss": 0.43348999, "grad_norm": 0.25444949, "learning_rate": 3.566e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.080662, "rewards/chosen": 13.42500019, "rewards/rejected": -0.31850585, "rewards/accuracies": 1.0, "rewards/margins": 13.75, "logps/chosen": -397.3999939, "logps/rejected": -596.0, "logits/chosen": -1.6796875, "logits/rejected": -1.70156252, "nll_loss": 0.43281251, "epoch": 1.84210526, "global_step/max_steps": "70/114", "percentage": "61.40%", "elapsed_time": "14m 23s", "remaining_time": "9m 2s"} +{"eval_loss": 0.27929688, "eval_runtime": 4.4096, "eval_samples_per_second": 0.907, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 11.0625, "eval_rewards/rejected": 0.703125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.375, "eval_logps/chosen": -130.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.6953125, "eval_logits/rejected": -1.28125, "eval_nll_loss": 0.27929688, "epoch": 1.84210526, "global_step/max_steps": "70/114", "percentage": "61.40%", "elapsed_time": "14m 28s", "remaining_time": "9m 5s"} +{"loss": 0.4626709, "grad_norm": 0.41078144, "learning_rate": 2.887e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.080513, "rewards/chosen": 13.23750019, "rewards/rejected": -0.74140626, "rewards/accuracies": 1.0, "rewards/margins": 14.0, "logps/chosen": -498.3999939, "logps/rejected": -641.20001221, "logits/chosen": -1.62968755, "logits/rejected": -1.61249995, "nll_loss": 0.46250001, "epoch": 1.97368421, "global_step/max_steps": "75/114", "percentage": "65.79%", "elapsed_time": "15m 27s", "remaining_time": "8m 2s"} +{"loss": 0.45915527, "grad_norm": 0.36366139, "learning_rate": 2.252e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.080112, "rewards/chosen": 13.10000038, "rewards/rejected": -1.13251948, "rewards/accuracies": 1.0, "rewards/margins": 14.23750019, "logps/chosen": -427.6000061, "logps/rejected": -590.79998779, "logits/chosen": -1.71249998, "logits/rejected": -1.64999998, "nll_loss": 0.47695312, "epoch": 2.10526316, "global_step/max_steps": "80/114", "percentage": "70.18%", "elapsed_time": "16m 34s", "remaining_time": "7m 2s"} +{"eval_loss": 0.27905273, "eval_runtime": 4.3677, "eval_samples_per_second": 0.916, "eval_steps_per_second": 0.229, "eval_rewards/chosen": 11.125, "eval_rewards/rejected": 0.453125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.6875, "eval_logps/chosen": -129.0, "eval_logps/rejected": -1072.0, "eval_logits/chosen": -1.6875, "eval_logits/rejected": -1.28125, "eval_nll_loss": 0.27929688, "epoch": 2.10526316, "global_step/max_steps": "80/114", "percentage": "70.18%", "elapsed_time": "16m 38s", "remaining_time": "7m 4s"} +{"loss": 0.39433899, "grad_norm": 0.43974718, "learning_rate": 1.676e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.080127, "rewards/chosen": 13.05000019, "rewards/rejected": -0.3185547, "rewards/accuracies": 1.0, "rewards/margins": 13.35000038, "logps/chosen": -380.3999939, "logps/rejected": -703.20001221, "logits/chosen": -1.61874998, "logits/rejected": -1.63125002, "nll_loss": 0.39414063, "epoch": 2.23684211, "global_step/max_steps": "85/114", "percentage": "74.56%", "elapsed_time": "17m 36s", "remaining_time": "6m 0s"} +{"loss": 0.42427979, "grad_norm": 0.21291019, "learning_rate": 1.17e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.080807, "rewards/chosen": 14.63749981, "rewards/rejected": -0.31542969, "rewards/accuracies": 1.0, "rewards/margins": 14.94999981, "logps/chosen": -482.79998779, "logps/rejected": -551.59997559, "logits/chosen": -1.60312498, "logits/rejected": -1.63125002, "nll_loss": 0.42460936, "epoch": 2.36842105, "global_step/max_steps": "90/114", "percentage": "78.95%", "elapsed_time": "18m 29s", "remaining_time": "4m 55s"} +{"eval_loss": 0.27758789, "eval_runtime": 4.3944, "eval_samples_per_second": 0.91, "eval_steps_per_second": 0.228, "eval_rewards/chosen": 11.25, "eval_rewards/rejected": 0.453125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.75, "eval_logps/chosen": -128.0, "eval_logps/rejected": -1072.0, "eval_logits/chosen": -1.703125, "eval_logits/rejected": -1.2734375, "eval_nll_loss": 0.27734375, "epoch": 2.36842105, "global_step/max_steps": "90/114", "percentage": "78.95%", "elapsed_time": "18m 34s", "remaining_time": "4m 57s"} +{"loss": 0.44596558, "grad_norm": 0.1479467, "learning_rate": 7.44e-06, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.080283, "rewards/chosen": 14.78750038, "rewards/rejected": -1.45468748, "rewards/accuracies": 1.0, "rewards/margins": 16.23749924, "logps/chosen": -510.79998779, "logps/rejected": -682.0, "logits/chosen": -1.67812502, "logits/rejected": -1.6640625, "nll_loss": 0.44570312, "epoch": 2.5, "global_step/max_steps": "95/114", "percentage": "83.33%", "elapsed_time": "19m 39s", "remaining_time": "3m 55s"} +{"loss": 0.39360352, "grad_norm": 0.26486421, "learning_rate": 4.09e-06, "memory(GiB)": 67.13, "train_speed(iter/s)": 0.080739, "rewards/chosen": 13.16250038, "rewards/rejected": -1.56718755, "rewards/accuracies": 1.0, "rewards/margins": 14.72500038, "logps/chosen": -397.3999939, "logps/rejected": -527.20001221, "logits/chosen": -1.6328125, "logits/rejected": -1.6328125, "nll_loss": 0.39355469, "epoch": 2.63157895, "global_step/max_steps": "100/114", "percentage": "87.72%", "elapsed_time": "20m 34s", "remaining_time": "2m 52s"} +{"eval_loss": 0.27783203, "eval_runtime": 4.4191, "eval_samples_per_second": 0.905, "eval_steps_per_second": 0.226, "eval_rewards/chosen": 11.25, "eval_rewards/rejected": 0.453125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.8125, "eval_logps/chosen": -128.0, "eval_logps/rejected": -1072.0, "eval_logits/chosen": -1.703125, "eval_logits/rejected": -1.2734375, "eval_nll_loss": 0.27734375, "epoch": 2.63157895, "global_step/max_steps": "100/114", "percentage": "87.72%", "elapsed_time": "20m 38s", "remaining_time": "2m 53s"} +{"loss": 0.41902771, "grad_norm": 0.41604752, "learning_rate": 1.7e-06, "memory(GiB)": 67.13, "train_speed(iter/s)": 0.08056, "rewards/chosen": 13.8125, "rewards/rejected": -0.92773438, "rewards/accuracies": 1.0, "rewards/margins": 14.72500038, "logps/chosen": -428.8500061, "logps/rejected": -531.59997559, "logits/chosen": -1.62812495, "logits/rejected": -1.70156252, "nll_loss": 0.41953126, "epoch": 2.76315789, "global_step/max_steps": "105/114", "percentage": "92.11%", "elapsed_time": "21m 39s", "remaining_time": "1m 51s"} +{"loss": 0.37988281, "grad_norm": 0.29656543, "learning_rate": 3.4e-07, "memory(GiB)": 67.13, "train_speed(iter/s)": 0.080742, "rewards/chosen": 14.4375, "rewards/rejected": -1.43124998, "rewards/accuracies": 1.0, "rewards/margins": 15.86250019, "logps/chosen": -446.0, "logps/rejected": -573.79998779, "logits/chosen": -1.66718745, "logits/rejected": -1.6875, "nll_loss": 0.37988281, "epoch": 2.89473684, "global_step/max_steps": "110/114", "percentage": "96.49%", "elapsed_time": "22m 38s", "remaining_time": "49s"} +{"eval_loss": 0.27758789, "eval_runtime": 4.4168, "eval_samples_per_second": 0.906, "eval_steps_per_second": 0.226, "eval_rewards/chosen": 11.25, "eval_rewards/rejected": 0.3515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.875, "eval_logps/chosen": -128.0, "eval_logps/rejected": -1072.0, "eval_logits/chosen": -1.703125, "eval_logits/rejected": -1.2734375, "eval_nll_loss": 0.27734375, "epoch": 2.89473684, "global_step/max_steps": "110/114", "percentage": "96.49%", "elapsed_time": "22m 42s", "remaining_time": "49s"} +{"eval_loss": 0.27734375, "eval_runtime": 4.3362, "eval_samples_per_second": 0.922, "eval_steps_per_second": 0.231, "eval_rewards/chosen": 11.1875, "eval_rewards/rejected": 0.3515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.875, "eval_logps/chosen": -128.0, "eval_logps/rejected": -1072.0, "eval_logits/chosen": -1.703125, "eval_logits/rejected": -1.2734375, "eval_nll_loss": 0.27734375, "epoch": 3.0, "global_step/max_steps": "114/114", "percentage": "100.00%", "elapsed_time": "23m 42s", "remaining_time": "0s"} +{"train_runtime": 1425.3359, "train_samples_per_second": 0.627, "train_steps_per_second": 0.08, "total_flos": 479640253628416.0, "train_loss": 0.61382709, "epoch": 3.0, "global_step/max_steps": "114/114", "percentage": "100.00%", "elapsed_time": "23m 45s", "remaining_time": "0s"} +{"train_dataset": "1698.815436±897.000106, min=182.000000, max=4081.000000, size=298", "val_dataset": "1637.250000±797.581461, min=755.000000, max=2485.000000, size=4", "model_parameter_info": "PeftModelForCausalLM: 32830.9852M Params (67.1089M Trainable [0.2044%]), 0.0001M Buffers.", "last_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114", "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/checkpoint-114", "best_metric": 0.27734375, "global_step": 114, "log_history": [{"loss": 1.2783203125, "grad_norm": 4.767905865350689, "learning_rate": 1.6666666666666667e-05, "memory(GiB)": 14.31, "train_speed(iter/s)": 0.059163, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -664.0, "logps/rejected": -370.0, "logits/chosen": -1.609375, "logits/rejected": -1.609375, "nll_loss": 0.5859375, "epoch": 0.02631578947368421, "step": 1}, {"loss": 1.839111328125, "grad_norm": 5.720924346118727, "learning_rate": 8.333333333333334e-05, "memory(GiB)": 24.25, "train_speed(iter/s)": 0.080606, "rewards/chosen": 0.082916259765625, "rewards/rejected": -0.0110015869140625, "rewards/accuracies": 0.34375, "rewards/margins": 0.09368896484375, "logps/chosen": -676.25, "logps/rejected": -513.5, "logits/chosen": -1.57421875, "logits/rejected": -1.640625, "nll_loss": 1.1826171875, "epoch": 0.13157894736842105, "step": 5}, {"loss": 1.83935546875, "grad_norm": 4.536016569461848, "learning_rate": 9.966191788709716e-05, "memory(GiB)": 56.43, "train_speed(iter/s)": 0.078543, "rewards/chosen": 0.6597656011581421, "rewards/rejected": 0.20273438096046448, "rewards/accuracies": 0.7250000238418579, "rewards/margins": 0.4574218690395355, "logps/chosen": -572.0, "logps/rejected": -645.5999755859375, "logits/chosen": -1.5046875476837158, "logits/rejected": -1.5343749523162842, "nll_loss": 1.3250000476837158, "epoch": 0.2631578947368421, "step": 10}, {"eval_loss": 0.77734375, "eval_runtime": 4.3928, "eval_samples_per_second": 0.911, "eval_steps_per_second": 0.228, "eval_rewards/chosen": 2.75, "eval_rewards/rejected": 1.3515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 1.3984375, "eval_logps/chosen": -213.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -1.6171875, "eval_logits/rejected": -1.2890625, "eval_nll_loss": 0.5546875, "epoch": 0.2631578947368421, "step": 10}, {"loss": 1.06064453125, "grad_norm": 2.6231857601470785, "learning_rate": 9.829629131445342e-05, "memory(GiB)": 56.43, "train_speed(iter/s)": 0.07979, "rewards/chosen": 3.4437499046325684, "rewards/rejected": 1.5187499523162842, "rewards/accuracies": 0.8999999761581421, "rewards/margins": 1.9249999523162842, "logps/chosen": -600.0, "logps/rejected": -515.5999755859375, "logits/chosen": -1.578125, "logits/rejected": -1.618749976158142, "nll_loss": 0.7710937261581421, "epoch": 0.39473684210526316, "step": 15}, {"loss": 0.7858154296875, "grad_norm": 0.7033851800529384, "learning_rate": 9.591080534401371e-05, "memory(GiB)": 56.43, "train_speed(iter/s)": 0.080777, "rewards/chosen": 6.862500190734863, "rewards/rejected": 3.6031250953674316, "rewards/accuracies": 0.9750000238418579, "rewards/margins": 3.2562499046325684, "logps/chosen": -374.6000061035156, "logps/rejected": -527.5999755859375, "logits/chosen": -1.649999976158142, "logits/rejected": -1.623437523841858, "nll_loss": 0.673046886920929, "epoch": 0.5263157894736842, "step": 20}, {"eval_loss": 0.3515625, "eval_runtime": 4.3823, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.228, "eval_rewards/chosen": 9.25, "eval_rewards/rejected": 4.84375, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 4.375, "eval_logps/chosen": -148.0, "eval_logps/rejected": -1024.0, "eval_logits/chosen": -1.6875, "eval_logits/rejected": -1.2890625, "eval_nll_loss": 0.32421875, "epoch": 0.5263157894736842, "step": 20}, {"loss": 0.5309295654296875, "grad_norm": 1.1907563826066778, "learning_rate": 9.255583362184999e-05, "memory(GiB)": 56.43, "train_speed(iter/s)": 0.07974, "rewards/chosen": 7.918749809265137, "rewards/rejected": 1.191796898841858, "rewards/accuracies": 1.0, "rewards/margins": 6.724999904632568, "logps/chosen": -543.5999755859375, "logps/rejected": -643.7999877929688, "logits/chosen": -1.6593749523162842, "logits/rejected": -1.6203124523162842, "nll_loss": 0.516406238079071, "epoch": 0.6578947368421053, "step": 25}, {"loss": 0.47745361328125, "grad_norm": 0.8601038031625003, "learning_rate": 8.83022221559489e-05, "memory(GiB)": 56.43, "train_speed(iter/s)": 0.080007, "rewards/chosen": 8.5625, "rewards/rejected": -0.774609386920929, "rewards/accuracies": 1.0, "rewards/margins": 9.362500190734863, "logps/chosen": -400.79998779296875, "logps/rejected": -584.4000244140625, "logits/chosen": -1.717187523841858, "logits/rejected": -1.6671874523162842, "nll_loss": 0.47734373807907104, "epoch": 0.7894736842105263, "step": 30}, {"eval_loss": 0.3046875, "eval_runtime": 4.4114, "eval_samples_per_second": 0.907, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 9.9375, "eval_rewards/rejected": 0.25, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.6875, "eval_logps/chosen": -141.0, "eval_logps/rejected": -1072.0, "eval_logits/chosen": -1.734375, "eval_logits/rejected": -1.296875, "eval_nll_loss": 0.3046875, "epoch": 0.7894736842105263, "step": 30}, {"loss": 0.53447265625, "grad_norm": 0.43138365725560973, "learning_rate": 8.323979328069689e-05, "memory(GiB)": 56.43, "train_speed(iter/s)": 0.079054, "rewards/chosen": 9.399999618530273, "rewards/rejected": 0.22822265326976776, "rewards/accuracies": 1.0, "rewards/margins": 9.149999618530273, "logps/chosen": -612.2000122070312, "logps/rejected": -616.0, "logits/chosen": -1.7312500476837158, "logits/rejected": -1.7765624523162842, "nll_loss": 0.5328124761581421, "epoch": 0.9210526315789473, "step": 35}, {"loss": 0.506884765625, "grad_norm": 0.4899916200761263, "learning_rate": 7.74754489035403e-05, "memory(GiB)": 56.43, "train_speed(iter/s)": 0.079664, "rewards/chosen": 10.25, "rewards/rejected": 0.07539062201976776, "rewards/accuracies": 1.0, "rewards/margins": 10.175000190734863, "logps/chosen": -474.0, "logps/rejected": -646.4000244140625, "logits/chosen": -1.678125023841858, "logits/rejected": -1.759374976158142, "nll_loss": 0.561718761920929, "epoch": 1.0526315789473684, "step": 40}, {"eval_loss": 0.291259765625, "eval_runtime": 4.3597, "eval_samples_per_second": 0.917, "eval_steps_per_second": 0.229, "eval_rewards/chosen": 10.5, "eval_rewards/rejected": 1.203125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 9.3125, "eval_logps/chosen": -135.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.75, "eval_logits/rejected": -1.3046875, "eval_nll_loss": 0.291015625, "epoch": 1.0526315789473684, "step": 40}, {"loss": 0.425408935546875, "grad_norm": 0.5474321289483255, "learning_rate": 7.113091308703498e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.078852, "rewards/chosen": 11.274999618530273, "rewards/rejected": 0.04960937425494194, "rewards/accuracies": 1.0, "rewards/margins": 11.25, "logps/chosen": -439.6000061035156, "logps/rejected": -661.2000122070312, "logits/chosen": -1.703125, "logits/rejected": -1.6203124523162842, "nll_loss": 0.4253906309604645, "epoch": 1.1842105263157894, "step": 45}, {"loss": 0.4717376708984375, "grad_norm": 0.4769138301102208, "learning_rate": 6.434016163555452e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.079395, "rewards/chosen": 11.949999809265137, "rewards/rejected": 0.13237304985523224, "rewards/accuracies": 1.0, "rewards/margins": 11.824999809265137, "logps/chosen": -488.79998779296875, "logps/rejected": -612.4000244140625, "logits/chosen": -1.7843749523162842, "logits/rejected": -1.818750023841858, "nll_loss": 0.4710937440395355, "epoch": 1.3157894736842106, "step": 50}, {"eval_loss": 0.283935546875, "eval_runtime": 4.4202, "eval_samples_per_second": 0.905, "eval_steps_per_second": 0.226, "eval_rewards/chosen": 10.875, "eval_rewards/rejected": 2.109375, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 8.75, "eval_logps/chosen": -131.0, "eval_logps/rejected": -1056.0, "eval_logits/chosen": -1.78125, "eval_logits/rejected": -1.3046875, "eval_nll_loss": 0.283203125, "epoch": 1.3157894736842106, "step": 50}, {"loss": 0.45333251953125, "grad_norm": 0.16456472919965845, "learning_rate": 5.724659296536233e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.079928, "rewards/chosen": 12.800000190734863, "rewards/rejected": 0.517578125, "rewards/accuracies": 1.0, "rewards/margins": 12.287500381469727, "logps/chosen": -465.6000061035156, "logps/rejected": -570.4000244140625, "logits/chosen": -1.649999976158142, "logits/rejected": -1.732812523841858, "nll_loss": 0.4535156190395355, "epoch": 1.4473684210526316, "step": 55}, {"loss": 0.41478271484375, "grad_norm": 0.13988896145691967, "learning_rate": 5e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.080587, "rewards/chosen": 13.5625, "rewards/rejected": 0.392578125, "rewards/accuracies": 1.0, "rewards/margins": 13.162500381469727, "logps/chosen": -504.3999938964844, "logps/rejected": -441.6000061035156, "logits/chosen": -1.65625, "logits/rejected": -1.6671874523162842, "nll_loss": 0.4144531190395355, "epoch": 1.5789473684210527, "step": 60}, {"eval_loss": 0.282958984375, "eval_runtime": 4.3947, "eval_samples_per_second": 0.91, "eval_steps_per_second": 0.228, "eval_rewards/chosen": 11.0, "eval_rewards/rejected": 2.5, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 8.5, "eval_logps/chosen": -130.0, "eval_logps/rejected": -1048.0, "eval_logits/chosen": -1.7265625, "eval_logits/rejected": -1.296875, "eval_nll_loss": 0.28125, "epoch": 1.5789473684210527, "step": 60}, {"loss": 0.402728271484375, "grad_norm": 0.26327409929500534, "learning_rate": 4.275340703463767e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.08052, "rewards/chosen": 13.25, "rewards/rejected": 0.18845824897289276, "rewards/accuracies": 1.0, "rewards/margins": 13.037500381469727, "logps/chosen": -445.3999938964844, "logps/rejected": -494.0, "logits/chosen": -1.6953125, "logits/rejected": -1.671875, "nll_loss": 0.40156251192092896, "epoch": 1.7105263157894737, "step": 65}, {"loss": 0.433489990234375, "grad_norm": 0.2544494877535854, "learning_rate": 3.5659838364445505e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.080662, "rewards/chosen": 13.425000190734863, "rewards/rejected": -0.3185058534145355, "rewards/accuracies": 1.0, "rewards/margins": 13.75, "logps/chosen": -397.3999938964844, "logps/rejected": -596.0, "logits/chosen": -1.6796875, "logits/rejected": -1.701562523841858, "nll_loss": 0.43281251192092896, "epoch": 1.8421052631578947, "step": 70}, {"eval_loss": 0.279296875, "eval_runtime": 4.4096, "eval_samples_per_second": 0.907, "eval_steps_per_second": 0.227, "eval_rewards/chosen": 11.0625, "eval_rewards/rejected": 0.703125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.375, "eval_logps/chosen": -130.0, "eval_logps/rejected": -1064.0, "eval_logits/chosen": -1.6953125, "eval_logits/rejected": -1.28125, "eval_nll_loss": 0.279296875, "epoch": 1.8421052631578947, "step": 70}, {"loss": 0.4626708984375, "grad_norm": 0.4107814395722938, "learning_rate": 2.886908691296504e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.080513, "rewards/chosen": 13.237500190734863, "rewards/rejected": -0.741406261920929, "rewards/accuracies": 1.0, "rewards/margins": 14.0, "logps/chosen": -498.3999938964844, "logps/rejected": -641.2000122070312, "logits/chosen": -1.6296875476837158, "logits/rejected": -1.6124999523162842, "nll_loss": 0.4625000059604645, "epoch": 1.973684210526316, "step": 75}, {"loss": 0.4591552734375, "grad_norm": 0.3636613929853374, "learning_rate": 2.25245510964597e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.080112, "rewards/chosen": 13.100000381469727, "rewards/rejected": -1.1325194835662842, "rewards/accuracies": 1.0, "rewards/margins": 14.237500190734863, "logps/chosen": -427.6000061035156, "logps/rejected": -590.7999877929688, "logits/chosen": -1.712499976158142, "logits/rejected": -1.649999976158142, "nll_loss": 0.4769531190395355, "epoch": 2.1052631578947367, "step": 80}, {"eval_loss": 0.279052734375, "eval_runtime": 4.3677, "eval_samples_per_second": 0.916, "eval_steps_per_second": 0.229, "eval_rewards/chosen": 11.125, "eval_rewards/rejected": 0.453125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.6875, "eval_logps/chosen": -129.0, "eval_logps/rejected": -1072.0, "eval_logits/chosen": -1.6875, "eval_logits/rejected": -1.28125, "eval_nll_loss": 0.279296875, "epoch": 2.1052631578947367, "step": 80}, {"loss": 0.3943389892578125, "grad_norm": 0.43974717734245733, "learning_rate": 1.6760206719303105e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.080127, "rewards/chosen": 13.050000190734863, "rewards/rejected": -0.31855469942092896, "rewards/accuracies": 1.0, "rewards/margins": 13.350000381469727, "logps/chosen": -380.3999938964844, "logps/rejected": -703.2000122070312, "logits/chosen": -1.618749976158142, "logits/rejected": -1.631250023841858, "nll_loss": 0.3941406309604645, "epoch": 2.236842105263158, "step": 85}, {"loss": 0.42427978515625, "grad_norm": 0.21291018894177471, "learning_rate": 1.1697777844051105e-05, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.080807, "rewards/chosen": 14.637499809265137, "rewards/rejected": -0.3154296875, "rewards/accuracies": 1.0, "rewards/margins": 14.949999809265137, "logps/chosen": -482.79998779296875, "logps/rejected": -551.5999755859375, "logits/chosen": -1.603124976158142, "logits/rejected": -1.631250023841858, "nll_loss": 0.42460936307907104, "epoch": 2.3684210526315788, "step": 90}, {"eval_loss": 0.277587890625, "eval_runtime": 4.3944, "eval_samples_per_second": 0.91, "eval_steps_per_second": 0.228, "eval_rewards/chosen": 11.25, "eval_rewards/rejected": 0.453125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.75, "eval_logps/chosen": -128.0, "eval_logps/rejected": -1072.0, "eval_logits/chosen": -1.703125, "eval_logits/rejected": -1.2734375, "eval_nll_loss": 0.27734375, "epoch": 2.3684210526315788, "step": 90}, {"loss": 0.445965576171875, "grad_norm": 0.14794669988653775, "learning_rate": 7.444166378150013e-06, "memory(GiB)": 57.88, "train_speed(iter/s)": 0.080283, "rewards/chosen": 14.787500381469727, "rewards/rejected": -1.454687476158142, "rewards/accuracies": 1.0, "rewards/margins": 16.237499237060547, "logps/chosen": -510.79998779296875, "logps/rejected": -682.0, "logits/chosen": -1.678125023841858, "logits/rejected": -1.6640625, "nll_loss": 0.4457031190395355, "epoch": 2.5, "step": 95}, {"loss": 0.393603515625, "grad_norm": 0.26486420620313644, "learning_rate": 4.089194655986306e-06, "memory(GiB)": 67.13, "train_speed(iter/s)": 0.080739, "rewards/chosen": 13.162500381469727, "rewards/rejected": -1.5671875476837158, "rewards/accuracies": 1.0, "rewards/margins": 14.725000381469727, "logps/chosen": -397.3999938964844, "logps/rejected": -527.2000122070312, "logits/chosen": -1.6328125, "logits/rejected": -1.6328125, "nll_loss": 0.3935546875, "epoch": 2.6315789473684212, "step": 100}, {"eval_loss": 0.27783203125, "eval_runtime": 4.4191, "eval_samples_per_second": 0.905, "eval_steps_per_second": 0.226, "eval_rewards/chosen": 11.25, "eval_rewards/rejected": 0.453125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.8125, "eval_logps/chosen": -128.0, "eval_logps/rejected": -1072.0, "eval_logits/chosen": -1.703125, "eval_logits/rejected": -1.2734375, "eval_nll_loss": 0.27734375, "epoch": 2.6315789473684212, "step": 100}, {"loss": 0.4190277099609375, "grad_norm": 0.41604751594338824, "learning_rate": 1.70370868554659e-06, "memory(GiB)": 67.13, "train_speed(iter/s)": 0.08056, "rewards/chosen": 13.8125, "rewards/rejected": -0.927734375, "rewards/accuracies": 1.0, "rewards/margins": 14.725000381469727, "logps/chosen": -428.8500061035156, "logps/rejected": -531.5999755859375, "logits/chosen": -1.6281249523162842, "logits/rejected": -1.701562523841858, "nll_loss": 0.4195312559604645, "epoch": 2.763157894736842, "step": 105}, {"loss": 0.3798828125, "grad_norm": 0.2965654266316545, "learning_rate": 3.380821129028489e-07, "memory(GiB)": 67.13, "train_speed(iter/s)": 0.080742, "rewards/chosen": 14.4375, "rewards/rejected": -1.431249976158142, "rewards/accuracies": 1.0, "rewards/margins": 15.862500190734863, "logps/chosen": -446.0, "logps/rejected": -573.7999877929688, "logits/chosen": -1.6671874523162842, "logits/rejected": -1.6875, "nll_loss": 0.3798828125, "epoch": 2.8947368421052633, "step": 110}, {"eval_loss": 0.277587890625, "eval_runtime": 4.4168, "eval_samples_per_second": 0.906, "eval_steps_per_second": 0.226, "eval_rewards/chosen": 11.25, "eval_rewards/rejected": 0.3515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.875, "eval_logps/chosen": -128.0, "eval_logps/rejected": -1072.0, "eval_logits/chosen": -1.703125, "eval_logits/rejected": -1.2734375, "eval_nll_loss": 0.27734375, "epoch": 2.8947368421052633, "step": 110}, {"eval_loss": 0.27734375, "eval_runtime": 4.3362, "eval_samples_per_second": 0.922, "eval_steps_per_second": 0.231, "eval_rewards/chosen": 11.1875, "eval_rewards/rejected": 0.3515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.875, "eval_logps/chosen": -128.0, "eval_logps/rejected": -1072.0, "eval_logits/chosen": -1.703125, "eval_logits/rejected": -1.2734375, "eval_nll_loss": 0.27734375, "epoch": 3.0, "step": 114}, {"train_runtime": 1425.3359, "train_samples_per_second": 0.627, "train_steps_per_second": 0.08, "total_flos": 479640253628416.0, "train_loss": 0.6138270863315516, "epoch": 3.0, "step": 114}], "memory": 67.126953125} diff --git a/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs/events.out.tfevents.1739619087.kml-task-540432-record-10144729-prod-worker-0.171.0 b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs/events.out.tfevents.1739619087.kml-task-540432-record-10144729-prod-worker-0.171.0 new file mode 100644 index 0000000000000000000000000000000000000000..7cef05c5347bc4edecad795d669db6b2a5709e02 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-32b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-112700/runs/events.out.tfevents.1739619087.kml-task-540432-record-10144729-prod-worker-0.171.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a31cf01c493ab6343f723062aa162b5d76183a5648a4fa0799b4df1ce8860d35 +size 36869 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/args.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/args.json new file mode 100644 index 0000000000000000000000000000000000000000..eb87e3299ce19de9038ae76677ec973ce81e79a6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-7b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/README.md b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e4e3f02b35c0f2feed1d84197d9fc0806a438243 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4ed9b24360267b4070cbf2486dcfb95a799d65be --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "up_proj", + "k_proj", + "v_proj", + "gate_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..682d9a9e63211548ffac5327b898372b089be9b2 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c4d6bec744526fde7081695504d73e28ed6a70a70eaff944ea8c68b8eaee457 +size 40422208 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/args.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/args.json new file mode 100644 index 0000000000000000000000000000000000000000..eb87e3299ce19de9038ae76677ec973ce81e79a6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-7b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..72cbf685a57c8895718c40075a21d2c393f885a5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b07c01f69755f5d22524991d8bafdcf840a20dc36f71b7b4f07d119c40f54ff2 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9eb2849c6bc5b11933efb9a2ce762dd3f9a6fdb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a51627198c15eebcff9b5d7b6731b04800877a04997fb7f75c2f914436c0b6 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6f06160bc5b896876d8bc16df07e88bab112ea0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c52bc7533f96e3bcfa575509205758760101a1f400420b4a21bcc7c2227f04b +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b188cbad6708c782a7a274ff62c54918f5c9c80 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:592274e3561ac05b55616f034ff0bc9f6611d0764d9958013c4d370461ffd676 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..16e862ebd0008dfa78caae8a898753a4fe84d7b8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdc71584a13e2b45af9163ae9e1984eb2ae72fdb5b006335d3bbfb39efc1d4a3 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a6f3d0e7b9586dea7e2cc34f8f096263c1e08af3 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b8670f71b631d6e628a282a45a9aa0bf42547f162ee5d9ef9a02cb1a861492 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..87b495b8d6caad121257ecad99a57a0e148a9dac --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bd5c250af67cc54b5d1b1cc64f2a7593e95da9f5871985e4b24500305f73e22 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..33a7525d85d4e5b897d495c9142fdaff9fe1b62c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:805913ea39b5f7bebdb2910b7995ce59ca092385fada25876208f190bfb1029b +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..51a824dd36aeeb461bcf24fdf65384617685ab58 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63fc52c4a93583d9ad1a3836a7962f19034a9214ea700213400e4e221d387ebf +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3030ad106e287244bdff00e3a480f834957bf98a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2805eac185e17dd4b95e30fc04f67d72aa81a6d73880fe5e9379cef1e5eb6849 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c64c71e0dbfaaf1d7d1180fb635af37c4883ce5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d7025a7b98bdc6b48906710b0f90ff63b74c7dcffd11ccf5d3339ab5055d27a +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f307c60d54d063683f5211762ab54a9926dee0ad --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9021c4d4ef3a89baee0711f5c7a98e7594780638178c4911da6b76c1b3d6ad58 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f17ea040e9ab6a061988521ac29614f857dbab5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f593d1f818e92016a3064904de1ca8d3c95a428a1c461d70d2b3707cc0bc08cd +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b5f8c06fd177293874596e9c2ac0303e0a197ce --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1bbf993d78c31b2e89172cc59291164bae77a5a9420476e18675c7cedc0ec6 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f56a955eeaeebf1b6c1cb4ab4150de631f41dc19 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c72726d5c73af4fdcb1da59a45a3e232a4469863c1f548f8634359802bf5015 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..97a4a5ca4846ec856e77100a09835cc006430632 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f503458dbdd0f650b050f90804c781b0da4033f3af162b716d3b25fa8c0dac7f +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/latest b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/latest new file mode 100644 index 0000000000000000000000000000000000000000..744ae7dbad571b6f37ec6c7066549494261bb59e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/latest @@ -0,0 +1 @@ +global_step100 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..97f51b498d48145bd9cc14b35f8236b9ec95a4f7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1bec598899f9d59e70c1b4705ce420a1e0a670957b6c8153a589880068ae5a4 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..08e59ac81067b262a084604cd3392250166c2841 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60d2348aae518f4c44693db9c9b4b3a3299c556e7f0a86c188b2e4c3e364a7c +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..20a24c17b4be2ee59cd5e6682010519318a91e58 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe5a79d3bcb4ce033de360bc765e616316e3562aba25887cd85c4adbb935abf +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..54050f6cf8fb847e2a926e14a7aad2647761521a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a9d1f6e22677721841890e6a27855857e6840137650d609eb8e4ac13b71d29 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..263aae475c49b090bce43f143308192c5bf9a95b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac4ff84388a6a4fe3bcae6207c68b2ee5528fb3b6de8cc3588fe1975462aa5 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..942ed5d60ae87dce686b33da76a34db404036dc6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fce3cdf5c1b8a8a291e0c73b384e3ad5252640e21e942b44b26b8b0928ffa9 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..57789be3df3983cb8acc1500bf6470ffadb1c578 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:919e675f3bcaf4f3c8ba35cd8debf85aec3bbc3c8e5019b74431e0a314e4d37a +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..b32d6e2e7eb7148713b473b0c821a98e616ab6e6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf6479ce82b88efc6a72a8ee512162b3d0ecab972817296d38ab9c448bb8d96 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2a1fb08c48e9d34df783eb19e7c9d1caf0ed386 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ec37c3a15b8d061312402391f2fddb52d623a1416d6d2879a30f184450d844f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0eb0c8711b9dbddb570eae94d537d3bbb0d5e7fc --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/trainer_state.json @@ -0,0 +1,581 @@ +{ + "best_metric": 0.40600586, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90", + "epoch": 2.6315789473684212, + "eval_steps": 10, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 10.683995568480029, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": 0.828125, + "logits/rejected": -0.283203125, + "logps/chosen": -444.0, + "logps/rejected": -360.0, + "loss": 1.12939453125, + "memory(GiB)": 6.7, + "nll_loss": 0.439453125, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.128143 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.204940567356727, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -0.12890625, + "logits/rejected": 0.035888671875, + "logps/chosen": -377.25, + "logps/rejected": -512.5, + "loss": 1.8707275390625, + "memory(GiB)": 16.45, + "nll_loss": 1.1767578125, + "rewards/accuracies": 0.34375, + "rewards/chosen": 0.00313568115234375, + "rewards/margins": 0.01727294921875, + "rewards/rejected": -0.0140533447265625, + "step": 5, + "train_speed(iter/s)": 0.245862 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 13.261017953236665, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": 0.07431640475988388, + "logits/rejected": 0.19111327826976776, + "logps/chosen": -464.0, + "logps/rejected": -506.0, + "loss": 2.1458984375, + "memory(GiB)": 40.42, + "nll_loss": 1.5578124523162842, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.6318359375, + "rewards/margins": 0.3611083924770355, + "rewards/rejected": 0.27019041776657104, + "step": 10, + "train_speed(iter/s)": 0.261302 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -2.515625, + "eval_logits/rejected": 1.03125, + "eval_logps/chosen": -362.0, + "eval_logps/rejected": -496.0, + "eval_loss": 1.279296875, + "eval_nll_loss": 0.80078125, + "eval_rewards/accuracies": 0.75, + "eval_rewards/chosen": 2.203125, + "eval_rewards/margins": 1.2734375, + "eval_rewards/rejected": 0.92578125, + "eval_runtime": 1.3679, + "eval_samples_per_second": 2.924, + "eval_steps_per_second": 0.731, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 7.875469545265156, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -0.28095704317092896, + "logits/rejected": 0.11831054836511612, + "logps/chosen": -354.0, + "logps/rejected": -488.0, + "loss": 1.00537109375, + "memory(GiB)": 40.42, + "nll_loss": 0.734375, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.2828125953674316, + "rewards/margins": 2.1429686546325684, + "rewards/rejected": 1.142187476158142, + "step": 15, + "train_speed(iter/s)": 0.270976 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 3.3424016920629582, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -0.8218749761581421, + "logits/rejected": 0.4154296815395355, + "logps/chosen": -259.0, + "logps/rejected": -378.3999938964844, + "loss": 1.1302978515625, + "memory(GiB)": 40.42, + "nll_loss": 0.93359375, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.75, + "rewards/margins": 3.28125, + "rewards/rejected": 3.465625047683716, + "step": 20, + "train_speed(iter/s)": 0.278471 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.1015625, + "eval_logps/chosen": -298.0, + "eval_logps/rejected": -458.0, + "eval_loss": 0.66650390625, + "eval_nll_loss": 0.5625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 8.6875, + "eval_rewards/margins": 3.875, + "eval_rewards/rejected": 4.8125, + "eval_runtime": 1.3337, + "eval_samples_per_second": 2.999, + "eval_steps_per_second": 0.75, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 2.8405582693629725, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -0.37519532442092896, + "logits/rejected": 0.10800781100988388, + "logps/chosen": -373.0, + "logps/rejected": -422.6000061035156, + "loss": 0.659814453125, + "memory(GiB)": 40.42, + "nll_loss": 0.599609375, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.087499618530273, + "rewards/margins": 4.443749904632568, + "rewards/rejected": 4.640625, + "step": 25, + "train_speed(iter/s)": 0.276451 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.9876609542956052, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -0.5821288824081421, + "logits/rejected": 0.47343748807907104, + "logps/chosen": -250.1999969482422, + "logps/rejected": -412.79998779296875, + "loss": 0.5025634765625, + "memory(GiB)": 40.42, + "nll_loss": 0.48750001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.962499618530273, + "rewards/margins": 6.112500190734863, + "rewards/rejected": 4.853125095367432, + "step": 30, + "train_speed(iter/s)": 0.279399 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -2.09375, + "eval_logits/rejected": 1.21875, + "eval_logps/chosen": -270.0, + "eval_logps/rejected": -468.0, + "eval_loss": 0.49560546875, + "eval_nll_loss": 0.49609375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.4375, + "eval_rewards/margins": 7.65625, + "eval_rewards/rejected": 3.75, + "eval_runtime": 1.3068, + "eval_samples_per_second": 3.061, + "eval_steps_per_second": 0.765, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 1.1648359643743196, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -0.4947265684604645, + "logits/rejected": 0.33867186307907104, + "logps/chosen": -379.20001220703125, + "logps/rejected": -446.0, + "loss": 0.49095458984375, + "memory(GiB)": 40.42, + "nll_loss": 0.48828125, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.987500190734863, + "rewards/margins": 8.4375, + "rewards/rejected": 3.549999952316284, + "step": 35, + "train_speed(iter/s)": 0.276953 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 1.0600846626477478, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -0.2225341796875, + "logits/rejected": 0.572460949420929, + "logps/chosen": -272.20001220703125, + "logps/rejected": -484.0, + "loss": 0.457373046875, + "memory(GiB)": 40.42, + "nll_loss": 0.561718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.912500381469727, + "rewards/margins": 9.862500190734863, + "rewards/rejected": 2.0640625953674316, + "step": 40, + "train_speed(iter/s)": 0.279857 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.765625, + "eval_logits/rejected": 1.6953125, + "eval_logps/chosen": -253.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.447021484375, + "eval_nll_loss": 0.447265625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.125, + "eval_rewards/margins": 12.125, + "eval_rewards/rejected": 1.046875, + "eval_runtime": 1.3537, + "eval_samples_per_second": 2.955, + "eval_steps_per_second": 0.739, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.6410222742542526, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -0.21367187798023224, + "logits/rejected": 0.8939453363418579, + "logps/chosen": -306.6000061035156, + "logps/rejected": -481.20001220703125, + "loss": 0.44505615234375, + "memory(GiB)": 40.42, + "nll_loss": 0.4449218809604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.012499809265137, + "rewards/margins": 12.600000381469727, + "rewards/rejected": 1.421875, + "step": 45, + "train_speed(iter/s)": 0.277425 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.5485582252831179, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": 0.10859374701976776, + "logits/rejected": 1.1062500476837158, + "logps/chosen": -300.3999938964844, + "logps/rejected": -463.6000061035156, + "loss": 0.4327880859375, + "memory(GiB)": 40.42, + "nll_loss": 0.43242186307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.8125, + "rewards/margins": 12.949999809265137, + "rewards/rejected": 1.8603515625, + "step": 50, + "train_speed(iter/s)": 0.280134 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.5078125, + "eval_logits/rejected": 1.9921875, + "eval_logps/chosen": -243.0, + "eval_logps/rejected": -498.0, + "eval_loss": 0.424072265625, + "eval_nll_loss": 0.423828125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.1875, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.8515625, + "eval_runtime": 1.2934, + "eval_samples_per_second": 3.093, + "eval_steps_per_second": 0.773, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2883090056174341, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -0.1586044281721115, + "logits/rejected": 1.302343726158142, + "logps/chosen": -260.3999938964844, + "logps/rejected": -433.20001220703125, + "loss": 0.40018310546875, + "memory(GiB)": 40.42, + "nll_loss": 0.4000000059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.399999618530273, + "rewards/margins": 12.8125, + "rewards/rejected": 1.5867187976837158, + "step": 55, + "train_speed(iter/s)": 0.281604 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.23683344653551686, + "learning_rate": 5e-05, + "logits/chosen": -0.05322265625, + "logits/rejected": 0.7669922113418579, + "logps/chosen": -263.3999938964844, + "logps/rejected": -406.0, + "loss": 0.2842041015625, + "memory(GiB)": 40.42, + "nll_loss": 0.2835937440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.362500190734863, + "rewards/margins": 12.899999618530273, + "rewards/rejected": 2.457812547683716, + "step": 60, + "train_speed(iter/s)": 0.284501 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.359375, + "eval_logits/rejected": 1.984375, + "eval_logps/chosen": -241.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.42578125, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.375, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.97265625, + "eval_runtime": 1.3344, + "eval_samples_per_second": 2.998, + "eval_steps_per_second": 0.749, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.6109275312536815, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -0.49492186307907104, + "logits/rejected": 1.047949194908142, + "logps/chosen": -261.3999938964844, + "logps/rejected": -409.6000061035156, + "loss": 0.33233642578125, + "memory(GiB)": 40.42, + "nll_loss": 0.3326171934604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.75, + "rewards/margins": 13.975000381469727, + "rewards/rejected": 1.7880859375, + "step": 65, + "train_speed(iter/s)": 0.2839 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.4456195770344251, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": 0.04169921949505806, + "logits/rejected": 1.0632812976837158, + "logps/chosen": -193.0, + "logps/rejected": -502.0, + "loss": 0.32952423095703126, + "memory(GiB)": 40.42, + "nll_loss": 0.32929688692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.487500190734863, + "rewards/margins": 12.350000381469727, + "rewards/rejected": 2.1187500953674316, + "step": 70, + "train_speed(iter/s)": 0.284411 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.28125, + "eval_logits/rejected": 1.9921875, + "eval_logps/chosen": -236.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.412841796875, + "eval_nll_loss": 0.412109375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.875, + "eval_rewards/margins": 14.125, + "eval_rewards/rejected": 0.7265625, + "eval_runtime": 1.3284, + "eval_samples_per_second": 3.011, + "eval_steps_per_second": 0.753, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.8365310785663989, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -0.4095703065395355, + "logits/rejected": 1.3039062023162842, + "logps/chosen": -297.6000061035156, + "logps/rejected": -457.20001220703125, + "loss": 0.40308837890625, + "memory(GiB)": 40.42, + "nll_loss": 0.4029296934604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.850000381469727, + "rewards/margins": 15.212499618530273, + "rewards/rejected": 0.65625, + "step": 75, + "train_speed(iter/s)": 0.283895 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5507859400228646, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -0.17363281548023224, + "logits/rejected": 1.2405273914337158, + "logps/chosen": -238.60000610351562, + "logps/rejected": -438.0, + "loss": 0.364111328125, + "memory(GiB)": 40.42, + "nll_loss": 0.3833984434604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.324999809265137, + "rewards/margins": 14.100000381469727, + "rewards/rejected": 1.255468726158142, + "step": 80, + "train_speed(iter/s)": 0.28289 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.25, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -235.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.407958984375, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.9375, + "eval_rewards/margins": 14.375, + "eval_rewards/rejected": 0.57421875, + "eval_runtime": 1.3449, + "eval_samples_per_second": 2.974, + "eval_steps_per_second": 0.744, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.5879222859445165, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -0.2831054627895355, + "logits/rejected": 1.407812476158142, + "logps/chosen": -250.1999969482422, + "logps/rejected": -474.0, + "loss": 0.3390655517578125, + "memory(GiB)": 40.42, + "nll_loss": 0.33906251192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.225000381469727, + "rewards/margins": 15.475000381469727, + "rewards/rejected": 0.739697277545929, + "step": 85, + "train_speed(iter/s)": 0.282808 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.28403515939320456, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -0.35429686307907104, + "logits/rejected": 0.766406238079071, + "logps/chosen": -254.39999389648438, + "logps/rejected": -423.20001220703125, + "loss": 0.3229835510253906, + "memory(GiB)": 40.42, + "nll_loss": 0.32304686307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.274999618530273, + "rewards/margins": 14.462499618530273, + "rewards/rejected": 1.8312499523162842, + "step": 90, + "train_speed(iter/s)": 0.285525 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.2265625, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.406005859375, + "eval_nll_loss": 0.40625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.125, + "eval_rewards/margins": 14.5, + "eval_rewards/rejected": 0.57421875, + "eval_runtime": 1.3392, + "eval_samples_per_second": 2.987, + "eval_steps_per_second": 0.747, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.2494345039535932, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -0.19296875596046448, + "logits/rejected": 1.056249976158142, + "logps/chosen": -312.3999938964844, + "logps/rejected": -528.7999877929688, + "loss": 0.41544189453125, + "memory(GiB)": 40.42, + "nll_loss": 0.41523438692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.424999237060547, + "rewards/margins": 16.762500762939453, + "rewards/rejected": 0.644238293170929, + "step": 95, + "train_speed(iter/s)": 0.283981 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.45907392711831513, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -0.5240234136581421, + "logits/rejected": 1.3953125476837158, + "logps/chosen": -245.89999389648438, + "logps/rejected": -407.20001220703125, + "loss": 0.351611328125, + "memory(GiB)": 49.67, + "nll_loss": 0.3515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.450000762939453, + "rewards/margins": 15.574999809265137, + "rewards/rejected": 0.8648437261581421, + "step": 100, + "train_speed(iter/s)": 0.285707 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -1.21875, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.407470703125, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.0625, + "eval_rewards/margins": 14.625, + "eval_rewards/rejected": 0.4765625, + "eval_runtime": 1.3287, + "eval_samples_per_second": 3.01, + "eval_steps_per_second": 0.753, + "step": 100 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 127458152022016.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6eed2c4f454bc0afdc4915e40e9435f5f68cf2a4 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:777f6ba228776bfdbc6a3dde531a341ee883fdf24c50e69b6fb575f29b1c0e77 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-100/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/README.md b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e4e3f02b35c0f2feed1d84197d9fc0806a438243 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4ed9b24360267b4070cbf2486dcfb95a799d65be --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "up_proj", + "k_proj", + "v_proj", + "gate_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d11fb1a038bbfa47db0d3c339e7f621fae4fbb4c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eb130a0d68c1740abcc2bb88c47f6019d73fc0ebeb4d5b07590200f9cb7521d +size 40422208 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/args.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/args.json new file mode 100644 index 0000000000000000000000000000000000000000..eb87e3299ce19de9038ae76677ec973ce81e79a6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-7b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4c572aa5b0a39a71f7d5ea1f03749f2382fd91a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e946c823291aaa3fa8a288a9b9af0aafb1894f41b4d9852b02b3363aea5edd3 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f944381554c16f7ad02a9852d512514c1f6f7ea --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:813b23ebd4671c6c06a928a3ae6a5dae969bacd857c76277dff26a8505a09fef +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..08e050f93028612362690eae0a6a7f51575185a0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32c63b28f9291e061043b03d716295713398cfcbf77bb9b78d8645fb75e16c61 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9376ba0a6564c8689b71ee2f4526fce89dadda99 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e569e2fbe9cca752a07cb3bc08fcc50440972f3b43d46a589cd1740d440815c6 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2cf9aa10a875a20453375873660284179aea3ded --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:247ee9e6af0d073f4d8dd30cd86d7a1bc0e96a62df2a4d0af3a6aee31d4c49ea +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a757fc822d9290fd11ef8086af3b4255e0760871 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06ce56b6996a544e09b57f19fd949850b74b0e17e912ad2e4e95b5ae1ba65122 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..52beb4a3046949c753e0c18ab7a26ab1a2759e8a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7529643a4d864598de49826ceb157ce9075a7361c1f753d4e09883e6da8c39ed +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f99baa2646b45641e41cf6f20bb8ac3e245d3de0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3cbca363c7de2839cccc11d274e0d4e24b368d7472b5d2d2062746c01cd33de +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..152789dd54327e6bfa4fc820a0bb35567d8ca597 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1d8860ebfb197407ec768ac07edafa6fbe62350e351b9a6859375109af5d46e +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..213587e5d6220fff5573be8837fd2529325ea787 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b298319a704222bb170c1977523cd9aeb371872f676956fe5047c57fa019aa38 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9e0eb4554f898474e8d85cdf1329c4ead2e2d44 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e2141c1ede05cf6baf7bda959a8dc8dc015529b9ce5475efcd12eec0ee952de +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0c89b1d555f93e66af22a2a6e7018ff4784699c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c87e55263184e34cf82d3762556c0fee21cf35afc58bab08c688d06105152f71 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..95dc8801d4bb5301b03d9e2d0b22cc215722b04c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:203bc67f9659d5a561c1eb736a757985bbda37084e7bc5222b44852cefe65f43 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c091ec8448104ec15b660995658f17a1e659ab8c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be44f9c57bcc8de987570de0ccfdfc12ddace178b9ce5f5396cf80aaf03188d1 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0379844db5a48fb4b0012e36e950793300c0bff4 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:331865231d99b88cf110bdacb756e993887582890cd1e660099892c63c7235b9 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3396d0c7a746ba511bf20806f96eee8ef4e051a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a9dabfe01f64d9eedc1813c16b704063809df49b747a84e43aedc884180f24 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/latest b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/latest new file mode 100644 index 0000000000000000000000000000000000000000..a9a22a69382a7711ca9e8ab6945c6d2cc8984927 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/latest @@ -0,0 +1 @@ +global_step110 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..584f4a4a43f100f35696d7314a633631af587f25 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7891ffa7c7dae99113aa986d67278b52b8c57db55001dc3547a61f24569a34ee +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..05b027a867e5e9cebd446293ecff82cfb240cc76 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b92875cb04deec367605433847d1bda444b178b643d2da7ed9aaf738d232b4 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..af98f0dfe2a5d89fbccf90df58246a0b078c7016 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f5f3338a05e325b5408a1cd0b6f5e5b10fad05fe479d63f44bec4cf18107d6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..715aa4a4ee3915f810fc2bacb2153eb8a0913781 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1be749fea477a3867d44010631937e0d8f071ca5f9614f9795c92c7fa68833a6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..c7bde70899833455b6ee4a99aff9388abc5ffe92 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbc4a5ea4532c621f4c8e9891117b2e597a7f005001e8b4f2a1b4da8c82bf964 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..90cdeaa2fe438098e9d95ddbc06c765e51af1e78 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480f9fe7dd71b54d915b46162e34b780ba2467d5542115cc809dbca60b394c0e +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..2bd30529614c5be239cd9477af6bef0e313740b6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11d982dcd813e82c2d97a5491ce9624cff2dd22e8655ea617ccef1fc1474470 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..bed311094effd49cc2c89237c675f56eade157d1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73494fac3a001cba7cedd097b97f028d4c1d136ee6709214b0a7fe305e5b9089 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..38b1a376e7c81e3c533cf8a69ddf4eefa9d1336c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0767a9fe84680a5a8a76633a443cb301092115c026c1f5f7f1fbdc53dd7f856f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..faa10a7a8fddbbed0f5d373a98b8fdc118a437d4 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/trainer_state.json @@ -0,0 +1,634 @@ +{ + "best_metric": 0.40600586, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90", + "epoch": 2.8947368421052633, + "eval_steps": 10, + "global_step": 110, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 10.683995568480029, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": 0.828125, + "logits/rejected": -0.283203125, + "logps/chosen": -444.0, + "logps/rejected": -360.0, + "loss": 1.12939453125, + "memory(GiB)": 6.7, + "nll_loss": 0.439453125, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.128143 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.204940567356727, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -0.12890625, + "logits/rejected": 0.035888671875, + "logps/chosen": -377.25, + "logps/rejected": -512.5, + "loss": 1.8707275390625, + "memory(GiB)": 16.45, + "nll_loss": 1.1767578125, + "rewards/accuracies": 0.34375, + "rewards/chosen": 0.00313568115234375, + "rewards/margins": 0.01727294921875, + "rewards/rejected": -0.0140533447265625, + "step": 5, + "train_speed(iter/s)": 0.245862 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 13.261017953236665, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": 0.07431640475988388, + "logits/rejected": 0.19111327826976776, + "logps/chosen": -464.0, + "logps/rejected": -506.0, + "loss": 2.1458984375, + "memory(GiB)": 40.42, + "nll_loss": 1.5578124523162842, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.6318359375, + "rewards/margins": 0.3611083924770355, + "rewards/rejected": 0.27019041776657104, + "step": 10, + "train_speed(iter/s)": 0.261302 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -2.515625, + "eval_logits/rejected": 1.03125, + "eval_logps/chosen": -362.0, + "eval_logps/rejected": -496.0, + "eval_loss": 1.279296875, + "eval_nll_loss": 0.80078125, + "eval_rewards/accuracies": 0.75, + "eval_rewards/chosen": 2.203125, + "eval_rewards/margins": 1.2734375, + "eval_rewards/rejected": 0.92578125, + "eval_runtime": 1.3679, + "eval_samples_per_second": 2.924, + "eval_steps_per_second": 0.731, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 7.875469545265156, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -0.28095704317092896, + "logits/rejected": 0.11831054836511612, + "logps/chosen": -354.0, + "logps/rejected": -488.0, + "loss": 1.00537109375, + "memory(GiB)": 40.42, + "nll_loss": 0.734375, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.2828125953674316, + "rewards/margins": 2.1429686546325684, + "rewards/rejected": 1.142187476158142, + "step": 15, + "train_speed(iter/s)": 0.270976 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 3.3424016920629582, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -0.8218749761581421, + "logits/rejected": 0.4154296815395355, + "logps/chosen": -259.0, + "logps/rejected": -378.3999938964844, + "loss": 1.1302978515625, + "memory(GiB)": 40.42, + "nll_loss": 0.93359375, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.75, + "rewards/margins": 3.28125, + "rewards/rejected": 3.465625047683716, + "step": 20, + "train_speed(iter/s)": 0.278471 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.1015625, + "eval_logps/chosen": -298.0, + "eval_logps/rejected": -458.0, + "eval_loss": 0.66650390625, + "eval_nll_loss": 0.5625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 8.6875, + "eval_rewards/margins": 3.875, + "eval_rewards/rejected": 4.8125, + "eval_runtime": 1.3337, + "eval_samples_per_second": 2.999, + "eval_steps_per_second": 0.75, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 2.8405582693629725, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -0.37519532442092896, + "logits/rejected": 0.10800781100988388, + "logps/chosen": -373.0, + "logps/rejected": -422.6000061035156, + "loss": 0.659814453125, + "memory(GiB)": 40.42, + "nll_loss": 0.599609375, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.087499618530273, + "rewards/margins": 4.443749904632568, + "rewards/rejected": 4.640625, + "step": 25, + "train_speed(iter/s)": 0.276451 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.9876609542956052, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -0.5821288824081421, + "logits/rejected": 0.47343748807907104, + "logps/chosen": -250.1999969482422, + "logps/rejected": -412.79998779296875, + "loss": 0.5025634765625, + "memory(GiB)": 40.42, + "nll_loss": 0.48750001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.962499618530273, + "rewards/margins": 6.112500190734863, + "rewards/rejected": 4.853125095367432, + "step": 30, + "train_speed(iter/s)": 0.279399 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -2.09375, + "eval_logits/rejected": 1.21875, + "eval_logps/chosen": -270.0, + "eval_logps/rejected": -468.0, + "eval_loss": 0.49560546875, + "eval_nll_loss": 0.49609375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.4375, + "eval_rewards/margins": 7.65625, + "eval_rewards/rejected": 3.75, + "eval_runtime": 1.3068, + "eval_samples_per_second": 3.061, + "eval_steps_per_second": 0.765, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 1.1648359643743196, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -0.4947265684604645, + "logits/rejected": 0.33867186307907104, + "logps/chosen": -379.20001220703125, + "logps/rejected": -446.0, + "loss": 0.49095458984375, + "memory(GiB)": 40.42, + "nll_loss": 0.48828125, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.987500190734863, + "rewards/margins": 8.4375, + "rewards/rejected": 3.549999952316284, + "step": 35, + "train_speed(iter/s)": 0.276953 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 1.0600846626477478, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -0.2225341796875, + "logits/rejected": 0.572460949420929, + "logps/chosen": -272.20001220703125, + "logps/rejected": -484.0, + "loss": 0.457373046875, + "memory(GiB)": 40.42, + "nll_loss": 0.561718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.912500381469727, + "rewards/margins": 9.862500190734863, + "rewards/rejected": 2.0640625953674316, + "step": 40, + "train_speed(iter/s)": 0.279857 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.765625, + "eval_logits/rejected": 1.6953125, + "eval_logps/chosen": -253.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.447021484375, + "eval_nll_loss": 0.447265625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.125, + "eval_rewards/margins": 12.125, + "eval_rewards/rejected": 1.046875, + "eval_runtime": 1.3537, + "eval_samples_per_second": 2.955, + "eval_steps_per_second": 0.739, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.6410222742542526, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -0.21367187798023224, + "logits/rejected": 0.8939453363418579, + "logps/chosen": -306.6000061035156, + "logps/rejected": -481.20001220703125, + "loss": 0.44505615234375, + "memory(GiB)": 40.42, + "nll_loss": 0.4449218809604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.012499809265137, + "rewards/margins": 12.600000381469727, + "rewards/rejected": 1.421875, + "step": 45, + "train_speed(iter/s)": 0.277425 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.5485582252831179, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": 0.10859374701976776, + "logits/rejected": 1.1062500476837158, + "logps/chosen": -300.3999938964844, + "logps/rejected": -463.6000061035156, + "loss": 0.4327880859375, + "memory(GiB)": 40.42, + "nll_loss": 0.43242186307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.8125, + "rewards/margins": 12.949999809265137, + "rewards/rejected": 1.8603515625, + "step": 50, + "train_speed(iter/s)": 0.280134 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.5078125, + "eval_logits/rejected": 1.9921875, + "eval_logps/chosen": -243.0, + "eval_logps/rejected": -498.0, + "eval_loss": 0.424072265625, + "eval_nll_loss": 0.423828125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.1875, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.8515625, + "eval_runtime": 1.2934, + "eval_samples_per_second": 3.093, + "eval_steps_per_second": 0.773, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2883090056174341, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -0.1586044281721115, + "logits/rejected": 1.302343726158142, + "logps/chosen": -260.3999938964844, + "logps/rejected": -433.20001220703125, + "loss": 0.40018310546875, + "memory(GiB)": 40.42, + "nll_loss": 0.4000000059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.399999618530273, + "rewards/margins": 12.8125, + "rewards/rejected": 1.5867187976837158, + "step": 55, + "train_speed(iter/s)": 0.281604 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.23683344653551686, + "learning_rate": 5e-05, + "logits/chosen": -0.05322265625, + "logits/rejected": 0.7669922113418579, + "logps/chosen": -263.3999938964844, + "logps/rejected": -406.0, + "loss": 0.2842041015625, + "memory(GiB)": 40.42, + "nll_loss": 0.2835937440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.362500190734863, + "rewards/margins": 12.899999618530273, + "rewards/rejected": 2.457812547683716, + "step": 60, + "train_speed(iter/s)": 0.284501 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.359375, + "eval_logits/rejected": 1.984375, + "eval_logps/chosen": -241.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.42578125, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.375, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.97265625, + "eval_runtime": 1.3344, + "eval_samples_per_second": 2.998, + "eval_steps_per_second": 0.749, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.6109275312536815, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -0.49492186307907104, + "logits/rejected": 1.047949194908142, + "logps/chosen": -261.3999938964844, + "logps/rejected": -409.6000061035156, + "loss": 0.33233642578125, + "memory(GiB)": 40.42, + "nll_loss": 0.3326171934604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.75, + "rewards/margins": 13.975000381469727, + "rewards/rejected": 1.7880859375, + "step": 65, + "train_speed(iter/s)": 0.2839 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.4456195770344251, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": 0.04169921949505806, + "logits/rejected": 1.0632812976837158, + "logps/chosen": -193.0, + "logps/rejected": -502.0, + "loss": 0.32952423095703126, + "memory(GiB)": 40.42, + "nll_loss": 0.32929688692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.487500190734863, + "rewards/margins": 12.350000381469727, + "rewards/rejected": 2.1187500953674316, + "step": 70, + "train_speed(iter/s)": 0.284411 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.28125, + "eval_logits/rejected": 1.9921875, + "eval_logps/chosen": -236.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.412841796875, + "eval_nll_loss": 0.412109375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.875, + "eval_rewards/margins": 14.125, + "eval_rewards/rejected": 0.7265625, + "eval_runtime": 1.3284, + "eval_samples_per_second": 3.011, + "eval_steps_per_second": 0.753, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.8365310785663989, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -0.4095703065395355, + "logits/rejected": 1.3039062023162842, + "logps/chosen": -297.6000061035156, + "logps/rejected": -457.20001220703125, + "loss": 0.40308837890625, + "memory(GiB)": 40.42, + "nll_loss": 0.4029296934604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.850000381469727, + "rewards/margins": 15.212499618530273, + "rewards/rejected": 0.65625, + "step": 75, + "train_speed(iter/s)": 0.283895 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5507859400228646, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -0.17363281548023224, + "logits/rejected": 1.2405273914337158, + "logps/chosen": -238.60000610351562, + "logps/rejected": -438.0, + "loss": 0.364111328125, + "memory(GiB)": 40.42, + "nll_loss": 0.3833984434604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.324999809265137, + "rewards/margins": 14.100000381469727, + "rewards/rejected": 1.255468726158142, + "step": 80, + "train_speed(iter/s)": 0.28289 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.25, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -235.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.407958984375, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.9375, + "eval_rewards/margins": 14.375, + "eval_rewards/rejected": 0.57421875, + "eval_runtime": 1.3449, + "eval_samples_per_second": 2.974, + "eval_steps_per_second": 0.744, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.5879222859445165, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -0.2831054627895355, + "logits/rejected": 1.407812476158142, + "logps/chosen": -250.1999969482422, + "logps/rejected": -474.0, + "loss": 0.3390655517578125, + "memory(GiB)": 40.42, + "nll_loss": 0.33906251192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.225000381469727, + "rewards/margins": 15.475000381469727, + "rewards/rejected": 0.739697277545929, + "step": 85, + "train_speed(iter/s)": 0.282808 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.28403515939320456, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -0.35429686307907104, + "logits/rejected": 0.766406238079071, + "logps/chosen": -254.39999389648438, + "logps/rejected": -423.20001220703125, + "loss": 0.3229835510253906, + "memory(GiB)": 40.42, + "nll_loss": 0.32304686307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.274999618530273, + "rewards/margins": 14.462499618530273, + "rewards/rejected": 1.8312499523162842, + "step": 90, + "train_speed(iter/s)": 0.285525 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.2265625, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.406005859375, + "eval_nll_loss": 0.40625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.125, + "eval_rewards/margins": 14.5, + "eval_rewards/rejected": 0.57421875, + "eval_runtime": 1.3392, + "eval_samples_per_second": 2.987, + "eval_steps_per_second": 0.747, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.2494345039535932, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -0.19296875596046448, + "logits/rejected": 1.056249976158142, + "logps/chosen": -312.3999938964844, + "logps/rejected": -528.7999877929688, + "loss": 0.41544189453125, + "memory(GiB)": 40.42, + "nll_loss": 0.41523438692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.424999237060547, + "rewards/margins": 16.762500762939453, + "rewards/rejected": 0.644238293170929, + "step": 95, + "train_speed(iter/s)": 0.283981 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.45907392711831513, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -0.5240234136581421, + "logits/rejected": 1.3953125476837158, + "logps/chosen": -245.89999389648438, + "logps/rejected": -407.20001220703125, + "loss": 0.351611328125, + "memory(GiB)": 49.67, + "nll_loss": 0.3515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.450000762939453, + "rewards/margins": 15.574999809265137, + "rewards/rejected": 0.8648437261581421, + "step": 100, + "train_speed(iter/s)": 0.285707 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -1.21875, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.407470703125, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.0625, + "eval_rewards/margins": 14.625, + "eval_rewards/rejected": 0.4765625, + "eval_runtime": 1.3287, + "eval_samples_per_second": 3.01, + "eval_steps_per_second": 0.753, + "step": 100 + }, + { + "epoch": 2.763157894736842, + "grad_norm": 0.8245490820336031, + "learning_rate": 1.70370868554659e-06, + "logits/chosen": -0.30781251192092896, + "logits/rejected": 1.142187476158142, + "logps/chosen": -228.60000610351562, + "logps/rejected": -441.20001220703125, + "loss": 0.316748046875, + "memory(GiB)": 49.67, + "nll_loss": 0.31640625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.012500762939453, + "rewards/margins": 13.862500190734863, + "rewards/rejected": 2.128124952316284, + "step": 105, + "train_speed(iter/s)": 0.285138 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 0.44538296304554453, + "learning_rate": 3.380821129028489e-07, + "logits/chosen": -0.34589844942092896, + "logits/rejected": 1.1906249523162842, + "logps/chosen": -212.39999389648438, + "logps/rejected": -445.3999938964844, + "loss": 0.3138641357421875, + "memory(GiB)": 49.67, + "nll_loss": 0.31367188692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.850000381469727, + "rewards/margins": 13.862500190734863, + "rewards/rejected": 1.97265625, + "step": 110, + "train_speed(iter/s)": 0.286087 + }, + { + "epoch": 2.8947368421052633, + "eval_logits/chosen": -1.21875, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.406005859375, + "eval_nll_loss": 0.40625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.125, + "eval_rewards/margins": 14.6875, + "eval_rewards/rejected": 0.42578125, + "eval_runtime": 1.3327, + "eval_samples_per_second": 3.001, + "eval_steps_per_second": 0.75, + "step": 110 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 140021774417920.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6eed2c4f454bc0afdc4915e40e9435f5f68cf2a4 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:777f6ba228776bfdbc6a3dde531a341ee883fdf24c50e69b6fb575f29b1c0e77 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-110/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/README.md b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e4e3f02b35c0f2feed1d84197d9fc0806a438243 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4ed9b24360267b4070cbf2486dcfb95a799d65be --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "up_proj", + "k_proj", + "v_proj", + "gate_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10ed3dfdf18c4257399d1cfac172248efa3cce33 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f26d1277cdc566e362c9cd88148c28aafede85be065117e950a4e27f8d8a6359 +size 40422208 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/args.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/args.json new file mode 100644 index 0000000000000000000000000000000000000000..eb87e3299ce19de9038ae76677ec973ce81e79a6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-7b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4a06898a60150a8dd6cdea902d916084dae14b8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07881a69d7b822446228b3801b76933c2c2899b9918e23ddf2cde07e735f1998 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1eed09640707010586a475ec807c6ebd42a3c153 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c814e40c17636c204209c169ab74e1124dd6b7062ff579cd4c5aff382ebc1978 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5769b4d82bee529ed90ee1c374d871fa961aac35 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f264828552b1b51c3f50386c20b02ef228d0bf709765d42a1380bb8ac802d48a +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c94c8e7844db8277b80d0b449205c71021274c1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cafffb9650441ea681aaa94f6f88c737499c75cd4e2d11845ee5b48e89f0f46 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f36dd5bc6b74c7f405afedbb23d1373621d045f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b52394ab7e2d2c0a1069d865e32f38081e75cfdbe8c55225e7301b12d782316 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe293933cb636c3f4ce00904f277a942c397a795 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e85c78962cc23eadf90d75a53d8aeaa44ab4c78f4fe4319398213ae77ea73271 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..574b79ed442c4712d8ebdda58ed1cb090d52b216 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6f0029be14be628f1848f546733db722ceca0861f8c298f34e40c9e6121985 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ba971e804610976c405d4e574a2d32bc89bcb2e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35f4311677fb6450d06cc062a278900bdbfc81144a9aac2b252dc94aa633d9e3 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8105dae45351bc9cd8b211c09f37bb5810d95eec --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b7abcceb9145df5a06a246581437a97934a2f8604dbf3c79a9a24c8810082f +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fe29e9f02f4c93f6a4a80ed47ee493663ea2869 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:047538f1cc9f7c5ced220fbb45f0ca9f60490275935b59b4ffa604e31036970b +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..73562b6988c82a4a8c844c641c8f3d6f330e77ed --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6ca18f7097ff28a90d8330b59dff6517f5dd7fcf4d416ce0ca9e03963aeff92 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..753f6a2f1281abee13b649b05f5562881ad567c0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c45b61d8ca6f2fb536ccc41020c68cdc99525321ad7e8f0559c0cd39e64ec647 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c5678c679c42abe517d73591596fdc55c4ae36c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1544cf6f6b337811d0d66e2ab7c960f567172e6768154013d3ad5bf104780e7 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..645d841d3b4fc8aec42bb33a8e158e33118a9e86 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50d991fd917a72eecd18f87f0933d1de6f0f5d4868c46317960defa5318a7ac1 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc20e3e0e7f464801f035b049d529536f9456b86 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1788ae209de6f083ea5e6d315e60f7ca77e5e12c3bfb1af3c47049d284bc1fe3 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..16318a8152fc653ccbb55a440f9c7789db0458bf --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:338e769473a5bd5e6a6e71459d5ff3a55f43595da5cc253baed8259c1dcd7cb9 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/latest b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/latest new file mode 100644 index 0000000000000000000000000000000000000000..aad80f76777fd4d23b0b81026f4601524335cbe1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/latest @@ -0,0 +1 @@ +global_step114 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..80f51268a9828e9592a20d8ae8b2cd4ba4bc362c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d994b317c4df888a1a1aabc0c532e81f1fa34c18c8313cb2feadca3bb37194 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..62e581603d525611f9660b6e859462f72bbc9258 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5b05860618aa49c7f5d8c366d6ee73cf8b3b0d0adc17d9313b72621630d0aa +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..18b03e632222a58e33ea4fca874b9c52628cc5e1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7262faf861e984775b4fd85bc76a11b0b8b04037690e8a08a58cf9ff5328a042 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..41735b5f7ace13ffa57ebed3e7042f1a48ac17fb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9479cad91150e2e266d17eb95fe678579a770f6df6b53496cf72067b186b094d +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..ebbb93c1d99b1645075ea27fc9fae66992a691f5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:435cb6cf559e0ce3fe0d4582cac16ea40b48b7a64589952402a4c399cafbfc00 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0656f84b77a33c9ceba9df16f36437b55ef71bc7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51001b0d8dc5792180c3a9705ccbfa66b61d46d7639afb6f7abf409629ed74f +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..32b3a313372ee4a2eeaeed69789f8fb4e2c70ad0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1e87084f11088fdce293e1fbbb05e35f5c7385b00e2f9ba195bf61cb36f757d +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..9c5a45264129fe1d7c409a6867de1a9751476a8e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d32e9bdd65145ae509e6c6ef4f6ea9d842f94a34c34a0d7d2ab6c248d3f2121 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a86ac614a477eb67963adb2c8c07f37c79ded059 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d7a9fd18bda7faa50931342147a7de5605bed0f91f6c70d821e84b7bf8f444f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a9584f595e69686d09ceaf271306ffc00fc34e5f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/trainer_state.json @@ -0,0 +1,651 @@ +{ + "best_metric": 0.40600586, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90", + "epoch": 3.0, + "eval_steps": 10, + "global_step": 114, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 10.683995568480029, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": 0.828125, + "logits/rejected": -0.283203125, + "logps/chosen": -444.0, + "logps/rejected": -360.0, + "loss": 1.12939453125, + "memory(GiB)": 6.7, + "nll_loss": 0.439453125, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.128143 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.204940567356727, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -0.12890625, + "logits/rejected": 0.035888671875, + "logps/chosen": -377.25, + "logps/rejected": -512.5, + "loss": 1.8707275390625, + "memory(GiB)": 16.45, + "nll_loss": 1.1767578125, + "rewards/accuracies": 0.34375, + "rewards/chosen": 0.00313568115234375, + "rewards/margins": 0.01727294921875, + "rewards/rejected": -0.0140533447265625, + "step": 5, + "train_speed(iter/s)": 0.245862 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 13.261017953236665, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": 0.07431640475988388, + "logits/rejected": 0.19111327826976776, + "logps/chosen": -464.0, + "logps/rejected": -506.0, + "loss": 2.1458984375, + "memory(GiB)": 40.42, + "nll_loss": 1.5578124523162842, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.6318359375, + "rewards/margins": 0.3611083924770355, + "rewards/rejected": 0.27019041776657104, + "step": 10, + "train_speed(iter/s)": 0.261302 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -2.515625, + "eval_logits/rejected": 1.03125, + "eval_logps/chosen": -362.0, + "eval_logps/rejected": -496.0, + "eval_loss": 1.279296875, + "eval_nll_loss": 0.80078125, + "eval_rewards/accuracies": 0.75, + "eval_rewards/chosen": 2.203125, + "eval_rewards/margins": 1.2734375, + "eval_rewards/rejected": 0.92578125, + "eval_runtime": 1.3679, + "eval_samples_per_second": 2.924, + "eval_steps_per_second": 0.731, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 7.875469545265156, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -0.28095704317092896, + "logits/rejected": 0.11831054836511612, + "logps/chosen": -354.0, + "logps/rejected": -488.0, + "loss": 1.00537109375, + "memory(GiB)": 40.42, + "nll_loss": 0.734375, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.2828125953674316, + "rewards/margins": 2.1429686546325684, + "rewards/rejected": 1.142187476158142, + "step": 15, + "train_speed(iter/s)": 0.270976 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 3.3424016920629582, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -0.8218749761581421, + "logits/rejected": 0.4154296815395355, + "logps/chosen": -259.0, + "logps/rejected": -378.3999938964844, + "loss": 1.1302978515625, + "memory(GiB)": 40.42, + "nll_loss": 0.93359375, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.75, + "rewards/margins": 3.28125, + "rewards/rejected": 3.465625047683716, + "step": 20, + "train_speed(iter/s)": 0.278471 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.1015625, + "eval_logps/chosen": -298.0, + "eval_logps/rejected": -458.0, + "eval_loss": 0.66650390625, + "eval_nll_loss": 0.5625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 8.6875, + "eval_rewards/margins": 3.875, + "eval_rewards/rejected": 4.8125, + "eval_runtime": 1.3337, + "eval_samples_per_second": 2.999, + "eval_steps_per_second": 0.75, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 2.8405582693629725, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -0.37519532442092896, + "logits/rejected": 0.10800781100988388, + "logps/chosen": -373.0, + "logps/rejected": -422.6000061035156, + "loss": 0.659814453125, + "memory(GiB)": 40.42, + "nll_loss": 0.599609375, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.087499618530273, + "rewards/margins": 4.443749904632568, + "rewards/rejected": 4.640625, + "step": 25, + "train_speed(iter/s)": 0.276451 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.9876609542956052, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -0.5821288824081421, + "logits/rejected": 0.47343748807907104, + "logps/chosen": -250.1999969482422, + "logps/rejected": -412.79998779296875, + "loss": 0.5025634765625, + "memory(GiB)": 40.42, + "nll_loss": 0.48750001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.962499618530273, + "rewards/margins": 6.112500190734863, + "rewards/rejected": 4.853125095367432, + "step": 30, + "train_speed(iter/s)": 0.279399 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -2.09375, + "eval_logits/rejected": 1.21875, + "eval_logps/chosen": -270.0, + "eval_logps/rejected": -468.0, + "eval_loss": 0.49560546875, + "eval_nll_loss": 0.49609375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.4375, + "eval_rewards/margins": 7.65625, + "eval_rewards/rejected": 3.75, + "eval_runtime": 1.3068, + "eval_samples_per_second": 3.061, + "eval_steps_per_second": 0.765, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 1.1648359643743196, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -0.4947265684604645, + "logits/rejected": 0.33867186307907104, + "logps/chosen": -379.20001220703125, + "logps/rejected": -446.0, + "loss": 0.49095458984375, + "memory(GiB)": 40.42, + "nll_loss": 0.48828125, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.987500190734863, + "rewards/margins": 8.4375, + "rewards/rejected": 3.549999952316284, + "step": 35, + "train_speed(iter/s)": 0.276953 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 1.0600846626477478, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -0.2225341796875, + "logits/rejected": 0.572460949420929, + "logps/chosen": -272.20001220703125, + "logps/rejected": -484.0, + "loss": 0.457373046875, + "memory(GiB)": 40.42, + "nll_loss": 0.561718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.912500381469727, + "rewards/margins": 9.862500190734863, + "rewards/rejected": 2.0640625953674316, + "step": 40, + "train_speed(iter/s)": 0.279857 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.765625, + "eval_logits/rejected": 1.6953125, + "eval_logps/chosen": -253.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.447021484375, + "eval_nll_loss": 0.447265625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.125, + "eval_rewards/margins": 12.125, + "eval_rewards/rejected": 1.046875, + "eval_runtime": 1.3537, + "eval_samples_per_second": 2.955, + "eval_steps_per_second": 0.739, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.6410222742542526, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -0.21367187798023224, + "logits/rejected": 0.8939453363418579, + "logps/chosen": -306.6000061035156, + "logps/rejected": -481.20001220703125, + "loss": 0.44505615234375, + "memory(GiB)": 40.42, + "nll_loss": 0.4449218809604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.012499809265137, + "rewards/margins": 12.600000381469727, + "rewards/rejected": 1.421875, + "step": 45, + "train_speed(iter/s)": 0.277425 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.5485582252831179, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": 0.10859374701976776, + "logits/rejected": 1.1062500476837158, + "logps/chosen": -300.3999938964844, + "logps/rejected": -463.6000061035156, + "loss": 0.4327880859375, + "memory(GiB)": 40.42, + "nll_loss": 0.43242186307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.8125, + "rewards/margins": 12.949999809265137, + "rewards/rejected": 1.8603515625, + "step": 50, + "train_speed(iter/s)": 0.280134 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.5078125, + "eval_logits/rejected": 1.9921875, + "eval_logps/chosen": -243.0, + "eval_logps/rejected": -498.0, + "eval_loss": 0.424072265625, + "eval_nll_loss": 0.423828125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.1875, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.8515625, + "eval_runtime": 1.2934, + "eval_samples_per_second": 3.093, + "eval_steps_per_second": 0.773, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2883090056174341, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -0.1586044281721115, + "logits/rejected": 1.302343726158142, + "logps/chosen": -260.3999938964844, + "logps/rejected": -433.20001220703125, + "loss": 0.40018310546875, + "memory(GiB)": 40.42, + "nll_loss": 0.4000000059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.399999618530273, + "rewards/margins": 12.8125, + "rewards/rejected": 1.5867187976837158, + "step": 55, + "train_speed(iter/s)": 0.281604 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.23683344653551686, + "learning_rate": 5e-05, + "logits/chosen": -0.05322265625, + "logits/rejected": 0.7669922113418579, + "logps/chosen": -263.3999938964844, + "logps/rejected": -406.0, + "loss": 0.2842041015625, + "memory(GiB)": 40.42, + "nll_loss": 0.2835937440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.362500190734863, + "rewards/margins": 12.899999618530273, + "rewards/rejected": 2.457812547683716, + "step": 60, + "train_speed(iter/s)": 0.284501 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.359375, + "eval_logits/rejected": 1.984375, + "eval_logps/chosen": -241.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.42578125, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.375, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.97265625, + "eval_runtime": 1.3344, + "eval_samples_per_second": 2.998, + "eval_steps_per_second": 0.749, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.6109275312536815, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -0.49492186307907104, + "logits/rejected": 1.047949194908142, + "logps/chosen": -261.3999938964844, + "logps/rejected": -409.6000061035156, + "loss": 0.33233642578125, + "memory(GiB)": 40.42, + "nll_loss": 0.3326171934604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.75, + "rewards/margins": 13.975000381469727, + "rewards/rejected": 1.7880859375, + "step": 65, + "train_speed(iter/s)": 0.2839 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.4456195770344251, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": 0.04169921949505806, + "logits/rejected": 1.0632812976837158, + "logps/chosen": -193.0, + "logps/rejected": -502.0, + "loss": 0.32952423095703126, + "memory(GiB)": 40.42, + "nll_loss": 0.32929688692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.487500190734863, + "rewards/margins": 12.350000381469727, + "rewards/rejected": 2.1187500953674316, + "step": 70, + "train_speed(iter/s)": 0.284411 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.28125, + "eval_logits/rejected": 1.9921875, + "eval_logps/chosen": -236.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.412841796875, + "eval_nll_loss": 0.412109375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.875, + "eval_rewards/margins": 14.125, + "eval_rewards/rejected": 0.7265625, + "eval_runtime": 1.3284, + "eval_samples_per_second": 3.011, + "eval_steps_per_second": 0.753, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.8365310785663989, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -0.4095703065395355, + "logits/rejected": 1.3039062023162842, + "logps/chosen": -297.6000061035156, + "logps/rejected": -457.20001220703125, + "loss": 0.40308837890625, + "memory(GiB)": 40.42, + "nll_loss": 0.4029296934604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.850000381469727, + "rewards/margins": 15.212499618530273, + "rewards/rejected": 0.65625, + "step": 75, + "train_speed(iter/s)": 0.283895 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5507859400228646, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -0.17363281548023224, + "logits/rejected": 1.2405273914337158, + "logps/chosen": -238.60000610351562, + "logps/rejected": -438.0, + "loss": 0.364111328125, + "memory(GiB)": 40.42, + "nll_loss": 0.3833984434604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.324999809265137, + "rewards/margins": 14.100000381469727, + "rewards/rejected": 1.255468726158142, + "step": 80, + "train_speed(iter/s)": 0.28289 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.25, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -235.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.407958984375, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.9375, + "eval_rewards/margins": 14.375, + "eval_rewards/rejected": 0.57421875, + "eval_runtime": 1.3449, + "eval_samples_per_second": 2.974, + "eval_steps_per_second": 0.744, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.5879222859445165, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -0.2831054627895355, + "logits/rejected": 1.407812476158142, + "logps/chosen": -250.1999969482422, + "logps/rejected": -474.0, + "loss": 0.3390655517578125, + "memory(GiB)": 40.42, + "nll_loss": 0.33906251192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.225000381469727, + "rewards/margins": 15.475000381469727, + "rewards/rejected": 0.739697277545929, + "step": 85, + "train_speed(iter/s)": 0.282808 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.28403515939320456, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -0.35429686307907104, + "logits/rejected": 0.766406238079071, + "logps/chosen": -254.39999389648438, + "logps/rejected": -423.20001220703125, + "loss": 0.3229835510253906, + "memory(GiB)": 40.42, + "nll_loss": 0.32304686307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.274999618530273, + "rewards/margins": 14.462499618530273, + "rewards/rejected": 1.8312499523162842, + "step": 90, + "train_speed(iter/s)": 0.285525 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.2265625, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.406005859375, + "eval_nll_loss": 0.40625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.125, + "eval_rewards/margins": 14.5, + "eval_rewards/rejected": 0.57421875, + "eval_runtime": 1.3392, + "eval_samples_per_second": 2.987, + "eval_steps_per_second": 0.747, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.2494345039535932, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -0.19296875596046448, + "logits/rejected": 1.056249976158142, + "logps/chosen": -312.3999938964844, + "logps/rejected": -528.7999877929688, + "loss": 0.41544189453125, + "memory(GiB)": 40.42, + "nll_loss": 0.41523438692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.424999237060547, + "rewards/margins": 16.762500762939453, + "rewards/rejected": 0.644238293170929, + "step": 95, + "train_speed(iter/s)": 0.283981 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.45907392711831513, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -0.5240234136581421, + "logits/rejected": 1.3953125476837158, + "logps/chosen": -245.89999389648438, + "logps/rejected": -407.20001220703125, + "loss": 0.351611328125, + "memory(GiB)": 49.67, + "nll_loss": 0.3515625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.450000762939453, + "rewards/margins": 15.574999809265137, + "rewards/rejected": 0.8648437261581421, + "step": 100, + "train_speed(iter/s)": 0.285707 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -1.21875, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.407470703125, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.0625, + "eval_rewards/margins": 14.625, + "eval_rewards/rejected": 0.4765625, + "eval_runtime": 1.3287, + "eval_samples_per_second": 3.01, + "eval_steps_per_second": 0.753, + "step": 100 + }, + { + "epoch": 2.763157894736842, + "grad_norm": 0.8245490820336031, + "learning_rate": 1.70370868554659e-06, + "logits/chosen": -0.30781251192092896, + "logits/rejected": 1.142187476158142, + "logps/chosen": -228.60000610351562, + "logps/rejected": -441.20001220703125, + "loss": 0.316748046875, + "memory(GiB)": 49.67, + "nll_loss": 0.31640625, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.012500762939453, + "rewards/margins": 13.862500190734863, + "rewards/rejected": 2.128124952316284, + "step": 105, + "train_speed(iter/s)": 0.285138 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 0.44538296304554453, + "learning_rate": 3.380821129028489e-07, + "logits/chosen": -0.34589844942092896, + "logits/rejected": 1.1906249523162842, + "logps/chosen": -212.39999389648438, + "logps/rejected": -445.3999938964844, + "loss": 0.3138641357421875, + "memory(GiB)": 49.67, + "nll_loss": 0.31367188692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.850000381469727, + "rewards/margins": 13.862500190734863, + "rewards/rejected": 1.97265625, + "step": 110, + "train_speed(iter/s)": 0.286087 + }, + { + "epoch": 2.8947368421052633, + "eval_logits/chosen": -1.21875, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.406005859375, + "eval_nll_loss": 0.40625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.125, + "eval_rewards/margins": 14.6875, + "eval_rewards/rejected": 0.42578125, + "eval_runtime": 1.3327, + "eval_samples_per_second": 3.001, + "eval_steps_per_second": 0.75, + "step": 110 + }, + { + "epoch": 3.0, + "eval_logits/chosen": -1.21875, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.407470703125, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.0625, + "eval_rewards/margins": 14.5, + "eval_rewards/rejected": 0.57421875, + "eval_runtime": 1.2973, + "eval_samples_per_second": 3.083, + "eval_steps_per_second": 0.771, + "step": 114 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 145005522976768.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6eed2c4f454bc0afdc4915e40e9435f5f68cf2a4 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:777f6ba228776bfdbc6a3dde531a341ee883fdf24c50e69b6fb575f29b1c0e77 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/README.md b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e4e3f02b35c0f2feed1d84197d9fc0806a438243 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4ed9b24360267b4070cbf2486dcfb95a799d65be --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "up_proj", + "k_proj", + "v_proj", + "gate_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0bdcce565426678ded34eb38f6058f804969b2cc --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4df8dd079de8ac54420224d698f631537d946eefe83acc9e373931d47958d4c +size 40422208 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/args.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/args.json new file mode 100644 index 0000000000000000000000000000000000000000..eb87e3299ce19de9038ae76677ec973ce81e79a6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-7b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..209660d166ea945fcf83fb867202f092552bc3ab --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d35979da910dd7be52926ad47746b3c9c6362949d03c7e821a320bbb8e1de5e +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3879bbd705428352dc01e42171424d12d2dc63f6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cdedbd1f62e402930a9bcce727aa7c2145468db6f7695d872ccdcc4093eb5e4 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c39e48f5ac8a1ed90b5f900cdbf1b7340055e0a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1b5dc1d34e65adbd0816fbabb9e79d1236f8db7a43b76ee7d6a90504c9a4e54 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ad3b41e27421d3c75e73c6d63bdad37b0617859 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09a8d12a2f9e9eadbd707a283a8f5b2bc7d94d61809270b7f0b081767a02d889 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6066510f6f8d69f69658106ee19b25d23db2757e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8b733c89c53b61942fb96a4845a9d6538b7269018c68f47ad2c2878b696e900 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..42e1e08aefe51507aa46419da90012f39f03c303 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfb3fe4ad2c37bbc47813a70ceeed8078ca19886b1352564e879c5bef7fbb47c +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7fd22ef75bd20dd4a39cca1425ccc22e98ad93d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53bd3b3c92047f5f1ee66cf8a4cc598c8e48e1923f01e464c9fbd8f1a6f443ac +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe18481848dbc513f0e177e6b508172aff5ec25a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:656651c70375ab1b2c0481939457b661ea20393b994df561b176ae16e8c63cd6 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..907119852928ed09d6e797d0c8b6b6afbcc39c47 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41be4a3da2e1d9ada88a564e57e68a179d0f4d3015921aa43b816bb2db11d812 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b03895f022afa69ae39102dc977acb8a69da371b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:903a995694e42d740831ac0f804c917f6d94d6256abc61366d53140608274561 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1f2797a5d6a72fc2ee526fad3243809fc1b0e93 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d612f5cc1f04060a662b881b69eef2573f37a9f123991fb2e4ec8fbf75090f7 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e09da547b800b7ee1fdaf3d0cdba50cd76f720eb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a36f568d392a165e7cdfd23a674624aff885b7e542c817dc85f17338f3d031f +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c21c94cd3a45200995942b6f7205f6116c015b2 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1975d338ad77bacca2045078cb8c938feef232709154e081c7ec29ef6c38ed9 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..53218daad5d247dd2ba46b15cb2c014627ad35cb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b998f129ff41d54973b5648e13a69503899063871132189954c90d31c6f14900 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..46f4c0dd4b1b825f665721be91ae4d1980f46212 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07456b371d001f8e5eeeffeafa980aec54e12c55c9485335fd694f01abae179 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f1bba5eeb8a975ae06eef0bdfce3b1800293178 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e62e8db390fea28ab5c6c90f21d525c6ee93880a05ab99e7a37800e65ef5a34 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/latest b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/latest new file mode 100644 index 0000000000000000000000000000000000000000..75eab498d0366633484ab40334e4b8fb92b16dad --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/latest @@ -0,0 +1 @@ +global_step80 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..0b8b4067e4559b34f9b554c4963fe80d7f5fe839 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba4c26c615bd5830d41566fab54dc69174be292761b34514b27fbe82b45b630b +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..8c0265e51b5761ac9b323aa87ba00ba14b97e202 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c761d7f9b90c29c2d348a1133fd39be52c65e6bee4c2d179f6a6e564eb3a40 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f5dd5aabcd6e7332f14a4796d6ec6c758e10aea0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccab847cc956e055fd3f9dcce06898826d065211e945b83576c8d487f87c5469 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..bcbdea3a573c2b7717f23e2ea0e4a6da6670d65d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e5f1dbdcf6ec820c22fd1e4258fcd7af2a2bce65c480988d3f111aa574c9c06 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..1cdcb8d1710063a6c30dec635b4c44e3cb6cd24e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a23184c3e806d2649776427d1da2c0c9137f9b23a84468f3bdd5bbc75f696c9 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e39323a662c284cd109b5ce8c39e8a0ce375f2c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382fc01b809542bf6f5e26742e3e19e80a1f189ac5de24cf8cd822e303916b83 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ce685d2e57181f70debfb25eb90cb76ceaf47da --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b178265c7d2ae07bff10b7312e5e49b9f5b4914c38969d2f64a6ca006296bca +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5e363b8083cdd817e0b3a2e6fd1b65a905e189b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:668825a859126c4cf32afb883895c91004130b6aee02178736ca2840e5429ad0 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eaf96d6803aea265d756d902db3c4cc2386f9742 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90524bcdb94734ac7120e4205110f14662bff8cee00eed50355875dcdc538029 +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..981bd7088cfdd870bae56e91d09aef8164d3587a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/trainer_state.json @@ -0,0 +1,475 @@ +{ + "best_metric": 0.40795898, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80", + "epoch": 2.1052631578947367, + "eval_steps": 10, + "global_step": 80, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 10.683995568480029, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": 0.828125, + "logits/rejected": -0.283203125, + "logps/chosen": -444.0, + "logps/rejected": -360.0, + "loss": 1.12939453125, + "memory(GiB)": 6.7, + "nll_loss": 0.439453125, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.128143 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.204940567356727, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -0.12890625, + "logits/rejected": 0.035888671875, + "logps/chosen": -377.25, + "logps/rejected": -512.5, + "loss": 1.8707275390625, + "memory(GiB)": 16.45, + "nll_loss": 1.1767578125, + "rewards/accuracies": 0.34375, + "rewards/chosen": 0.00313568115234375, + "rewards/margins": 0.01727294921875, + "rewards/rejected": -0.0140533447265625, + "step": 5, + "train_speed(iter/s)": 0.245862 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 13.261017953236665, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": 0.07431640475988388, + "logits/rejected": 0.19111327826976776, + "logps/chosen": -464.0, + "logps/rejected": -506.0, + "loss": 2.1458984375, + "memory(GiB)": 40.42, + "nll_loss": 1.5578124523162842, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.6318359375, + "rewards/margins": 0.3611083924770355, + "rewards/rejected": 0.27019041776657104, + "step": 10, + "train_speed(iter/s)": 0.261302 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -2.515625, + "eval_logits/rejected": 1.03125, + "eval_logps/chosen": -362.0, + "eval_logps/rejected": -496.0, + "eval_loss": 1.279296875, + "eval_nll_loss": 0.80078125, + "eval_rewards/accuracies": 0.75, + "eval_rewards/chosen": 2.203125, + "eval_rewards/margins": 1.2734375, + "eval_rewards/rejected": 0.92578125, + "eval_runtime": 1.3679, + "eval_samples_per_second": 2.924, + "eval_steps_per_second": 0.731, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 7.875469545265156, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -0.28095704317092896, + "logits/rejected": 0.11831054836511612, + "logps/chosen": -354.0, + "logps/rejected": -488.0, + "loss": 1.00537109375, + "memory(GiB)": 40.42, + "nll_loss": 0.734375, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.2828125953674316, + "rewards/margins": 2.1429686546325684, + "rewards/rejected": 1.142187476158142, + "step": 15, + "train_speed(iter/s)": 0.270976 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 3.3424016920629582, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -0.8218749761581421, + "logits/rejected": 0.4154296815395355, + "logps/chosen": -259.0, + "logps/rejected": -378.3999938964844, + "loss": 1.1302978515625, + "memory(GiB)": 40.42, + "nll_loss": 0.93359375, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.75, + "rewards/margins": 3.28125, + "rewards/rejected": 3.465625047683716, + "step": 20, + "train_speed(iter/s)": 0.278471 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.1015625, + "eval_logps/chosen": -298.0, + "eval_logps/rejected": -458.0, + "eval_loss": 0.66650390625, + "eval_nll_loss": 0.5625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 8.6875, + "eval_rewards/margins": 3.875, + "eval_rewards/rejected": 4.8125, + "eval_runtime": 1.3337, + "eval_samples_per_second": 2.999, + "eval_steps_per_second": 0.75, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 2.8405582693629725, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -0.37519532442092896, + "logits/rejected": 0.10800781100988388, + "logps/chosen": -373.0, + "logps/rejected": -422.6000061035156, + "loss": 0.659814453125, + "memory(GiB)": 40.42, + "nll_loss": 0.599609375, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.087499618530273, + "rewards/margins": 4.443749904632568, + "rewards/rejected": 4.640625, + "step": 25, + "train_speed(iter/s)": 0.276451 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.9876609542956052, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -0.5821288824081421, + "logits/rejected": 0.47343748807907104, + "logps/chosen": -250.1999969482422, + "logps/rejected": -412.79998779296875, + "loss": 0.5025634765625, + "memory(GiB)": 40.42, + "nll_loss": 0.48750001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.962499618530273, + "rewards/margins": 6.112500190734863, + "rewards/rejected": 4.853125095367432, + "step": 30, + "train_speed(iter/s)": 0.279399 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -2.09375, + "eval_logits/rejected": 1.21875, + "eval_logps/chosen": -270.0, + "eval_logps/rejected": -468.0, + "eval_loss": 0.49560546875, + "eval_nll_loss": 0.49609375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.4375, + "eval_rewards/margins": 7.65625, + "eval_rewards/rejected": 3.75, + "eval_runtime": 1.3068, + "eval_samples_per_second": 3.061, + "eval_steps_per_second": 0.765, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 1.1648359643743196, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -0.4947265684604645, + "logits/rejected": 0.33867186307907104, + "logps/chosen": -379.20001220703125, + "logps/rejected": -446.0, + "loss": 0.49095458984375, + "memory(GiB)": 40.42, + "nll_loss": 0.48828125, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.987500190734863, + "rewards/margins": 8.4375, + "rewards/rejected": 3.549999952316284, + "step": 35, + "train_speed(iter/s)": 0.276953 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 1.0600846626477478, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -0.2225341796875, + "logits/rejected": 0.572460949420929, + "logps/chosen": -272.20001220703125, + "logps/rejected": -484.0, + "loss": 0.457373046875, + "memory(GiB)": 40.42, + "nll_loss": 0.561718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.912500381469727, + "rewards/margins": 9.862500190734863, + "rewards/rejected": 2.0640625953674316, + "step": 40, + "train_speed(iter/s)": 0.279857 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.765625, + "eval_logits/rejected": 1.6953125, + "eval_logps/chosen": -253.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.447021484375, + "eval_nll_loss": 0.447265625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.125, + "eval_rewards/margins": 12.125, + "eval_rewards/rejected": 1.046875, + "eval_runtime": 1.3537, + "eval_samples_per_second": 2.955, + "eval_steps_per_second": 0.739, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.6410222742542526, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -0.21367187798023224, + "logits/rejected": 0.8939453363418579, + "logps/chosen": -306.6000061035156, + "logps/rejected": -481.20001220703125, + "loss": 0.44505615234375, + "memory(GiB)": 40.42, + "nll_loss": 0.4449218809604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.012499809265137, + "rewards/margins": 12.600000381469727, + "rewards/rejected": 1.421875, + "step": 45, + "train_speed(iter/s)": 0.277425 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.5485582252831179, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": 0.10859374701976776, + "logits/rejected": 1.1062500476837158, + "logps/chosen": -300.3999938964844, + "logps/rejected": -463.6000061035156, + "loss": 0.4327880859375, + "memory(GiB)": 40.42, + "nll_loss": 0.43242186307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.8125, + "rewards/margins": 12.949999809265137, + "rewards/rejected": 1.8603515625, + "step": 50, + "train_speed(iter/s)": 0.280134 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.5078125, + "eval_logits/rejected": 1.9921875, + "eval_logps/chosen": -243.0, + "eval_logps/rejected": -498.0, + "eval_loss": 0.424072265625, + "eval_nll_loss": 0.423828125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.1875, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.8515625, + "eval_runtime": 1.2934, + "eval_samples_per_second": 3.093, + "eval_steps_per_second": 0.773, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2883090056174341, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -0.1586044281721115, + "logits/rejected": 1.302343726158142, + "logps/chosen": -260.3999938964844, + "logps/rejected": -433.20001220703125, + "loss": 0.40018310546875, + "memory(GiB)": 40.42, + "nll_loss": 0.4000000059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.399999618530273, + "rewards/margins": 12.8125, + "rewards/rejected": 1.5867187976837158, + "step": 55, + "train_speed(iter/s)": 0.281604 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.23683344653551686, + "learning_rate": 5e-05, + "logits/chosen": -0.05322265625, + "logits/rejected": 0.7669922113418579, + "logps/chosen": -263.3999938964844, + "logps/rejected": -406.0, + "loss": 0.2842041015625, + "memory(GiB)": 40.42, + "nll_loss": 0.2835937440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.362500190734863, + "rewards/margins": 12.899999618530273, + "rewards/rejected": 2.457812547683716, + "step": 60, + "train_speed(iter/s)": 0.284501 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.359375, + "eval_logits/rejected": 1.984375, + "eval_logps/chosen": -241.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.42578125, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.375, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.97265625, + "eval_runtime": 1.3344, + "eval_samples_per_second": 2.998, + "eval_steps_per_second": 0.749, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.6109275312536815, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -0.49492186307907104, + "logits/rejected": 1.047949194908142, + "logps/chosen": -261.3999938964844, + "logps/rejected": -409.6000061035156, + "loss": 0.33233642578125, + "memory(GiB)": 40.42, + "nll_loss": 0.3326171934604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.75, + "rewards/margins": 13.975000381469727, + "rewards/rejected": 1.7880859375, + "step": 65, + "train_speed(iter/s)": 0.2839 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.4456195770344251, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": 0.04169921949505806, + "logits/rejected": 1.0632812976837158, + "logps/chosen": -193.0, + "logps/rejected": -502.0, + "loss": 0.32952423095703126, + "memory(GiB)": 40.42, + "nll_loss": 0.32929688692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.487500190734863, + "rewards/margins": 12.350000381469727, + "rewards/rejected": 2.1187500953674316, + "step": 70, + "train_speed(iter/s)": 0.284411 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.28125, + "eval_logits/rejected": 1.9921875, + "eval_logps/chosen": -236.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.412841796875, + "eval_nll_loss": 0.412109375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.875, + "eval_rewards/margins": 14.125, + "eval_rewards/rejected": 0.7265625, + "eval_runtime": 1.3284, + "eval_samples_per_second": 3.011, + "eval_steps_per_second": 0.753, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.8365310785663989, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -0.4095703065395355, + "logits/rejected": 1.3039062023162842, + "logps/chosen": -297.6000061035156, + "logps/rejected": -457.20001220703125, + "loss": 0.40308837890625, + "memory(GiB)": 40.42, + "nll_loss": 0.4029296934604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.850000381469727, + "rewards/margins": 15.212499618530273, + "rewards/rejected": 0.65625, + "step": 75, + "train_speed(iter/s)": 0.283895 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5507859400228646, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -0.17363281548023224, + "logits/rejected": 1.2405273914337158, + "logps/chosen": -238.60000610351562, + "logps/rejected": -438.0, + "loss": 0.364111328125, + "memory(GiB)": 40.42, + "nll_loss": 0.3833984434604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.324999809265137, + "rewards/margins": 14.100000381469727, + "rewards/rejected": 1.255468726158142, + "step": 80, + "train_speed(iter/s)": 0.28289 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.25, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -235.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.407958984375, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.9375, + "eval_rewards/margins": 14.375, + "eval_rewards/rejected": 0.57421875, + "eval_runtime": 1.3449, + "eval_samples_per_second": 2.974, + "eval_steps_per_second": 0.744, + "step": 80 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 102458195968000.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6eed2c4f454bc0afdc4915e40e9435f5f68cf2a4 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:777f6ba228776bfdbc6a3dde531a341ee883fdf24c50e69b6fb575f29b1c0e77 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-80/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/README.md b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e4e3f02b35c0f2feed1d84197d9fc0806a438243 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4ed9b24360267b4070cbf2486dcfb95a799d65be --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "up_proj", + "k_proj", + "v_proj", + "gate_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f4ede6f06f671da265c02bab4b3ae4bff7fb4b9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd4c12165f35f5d6698e621e4c2b9586f2fdcc81207a20447d22f7c757ce7f9 +size 40422208 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/args.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/args.json new file mode 100644 index 0000000000000000000000000000000000000000..eb87e3299ce19de9038ae76677ec973ce81e79a6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-7b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..73fc66defd8626c3d480cf60b0833c1866f22cbb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:137ef291e77f9235db491790b1e08b08c99b15e284ddf970423748906634ab84 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d99e2ae8a781dc35f14f84663fed0bd3166c422 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3826904a5f9921cac40cb7945b1b86f985d017d153d0d32f91abc529153c236 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fecd0deb0f76e3efc3aa5265b28a6a5d54398143 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3502c8f7187d66ed713ce2e7d322389eed10bd7e7ba6bb364b68c97bfcdad934 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b8f56600f7c21779642c68e137d2eef2fa55535 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8214b4772e9f2bd2fcb273bf691b25ee5c82a5e9c9c6370a786dd253687140d6 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0924b4b4e7795d3f291d1e09ddb95cc835e4bb9c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ba1cccccfa9b8b238e1dfcf98fdc87e3088bfa16dfbd6834d26e4649e738dcf +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..99f2697133180768d790be8e6324b6e4115fb591 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae97e0d6732bbe72b3a0b4548e508555c3af11035288d8dffc917135041e47b7 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff8b1859d2511eadb17d239c728da3bb66666781 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aab425fef6b7d17bb817f41fa448ea4fdd3466d5b6fc3d2850c084cb884e2db2 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..595e8bf5f4d46390399a294ab79e2bb80a8856db --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fce434838a76f5b244997cd59a669a626e83500bcd32dad6e1768d772667545a +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2ddf263d0bbfafc59cfd61bc3a0c4d239b5599e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e85838794056895d21f30752b5f4998b07f7ed00d8fb717bbb2bd83c11ce479 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c55b12bc56cc7a0f76877a92b327bd707199455 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e22daf696d9a2bc990c029c8b166df96a9c46700fe24aa4e62c183095432e08e +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ad5e7e41edc2354c2a332bba31cf725d2b57c1a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2474de799cac2fe35d742799fa42f663d49ed74097fe7f8fa29a7b04e9aa2d82 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bce9c762fdcb2d85827dfaf7689155e50056c07d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b7b3f07542ff494e76a2ed0743eca31ea67fcc72a502ef0a3e6a02a0a5b69e +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..359c03666c86b7ac4073c12e3901619ecae7edc5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064261d9d5cfc1f02c65e65192c1b359d04adccfe92bc07a09a8460983535e24 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a192d3d1e09d7b51b74e0a14059519773b20b01 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa44ff7d2c4aad6edaad1adc7dbb405171a6faf130109e604add3d527d8bc23 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3782be710ed25614631ae0091cf94385eeed343 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d907e70fe8ed2713e82c93280f9643133b7718a91fcbc78ba939e0843d794f3 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a024f742a8855695732ecd1b220532c301e17af --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9264c6de9771df782e735e6b99c8fc50144392b9221b167a2eccb20cd5534b +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/latest b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/latest new file mode 100644 index 0000000000000000000000000000000000000000..8e7a337e2cb23bf07023d223dd647df2d25f0fc1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/latest @@ -0,0 +1 @@ +global_step90 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e31a2394e12bf431ae13288c3d90fe4727f07fa7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feb6462d333dbc5bb5e497ea9b0adb960f7616f79e6eea63222de6d5bd559516 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d1db0a0f44aa3ac1d82c3bf8dc2d8968eeba4ce7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b045e1bfa728f51c8b51ab0faa20b128a4fbd350da006b9b39a19e24abdf5a74 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..75de18f57a056bd6a5f89df1abd045678f3f919e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76a3d058d2628a61848c2441d313f251278bd8f74ce43dc44d8cd8ad3e619a8 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fd100693bc9f3267d044ce4a16e702502dc03ec --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f72fc498e6eaa671cdc0e8a627a668b8ef607063a22ddb4edbc05e791be830 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..5aeeabfe119f1cb0c8c804f1b9a4d3049f478d69 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12889af98e175b734a788f4c5b8c4da91dd61ff3a05aaf61b9d4c66aa3dd8ad6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..91fe0f42382ab06f4d26d753745a914c9e46100e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe21a86abfceeac2cf2f48afd61a9a506cf61a287f3403f1adf391bb2ffa5a83 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..5830ca6bd04645962b6e56a00a91cd8349ca449c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73488bec91f9dee6d8105d06f99edaf4d27b6b064250d4c7023f33285b2f3132 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..343d1c0475f0dc64100dc67b09195e047f1a7bcf --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf6ee1cc2e1325b428a21172ec4e61b7220c5489751ea11c06bb66c77a0cd08 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a39c9cfeaa2d69cb5a66e83272eee65ddffaed5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b768777109679597db5d1fa24a743962bede33623e22702b13b95eab2d42cb8 +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a1343198e941f50bbf722786d366ecbf13511f2c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/trainer_state.json @@ -0,0 +1,528 @@ +{ + "best_metric": 0.40600586, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90", + "epoch": 2.3684210526315788, + "eval_steps": 10, + "global_step": 90, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 10.683995568480029, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": 0.828125, + "logits/rejected": -0.283203125, + "logps/chosen": -444.0, + "logps/rejected": -360.0, + "loss": 1.12939453125, + "memory(GiB)": 6.7, + "nll_loss": 0.439453125, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.128143 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.204940567356727, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -0.12890625, + "logits/rejected": 0.035888671875, + "logps/chosen": -377.25, + "logps/rejected": -512.5, + "loss": 1.8707275390625, + "memory(GiB)": 16.45, + "nll_loss": 1.1767578125, + "rewards/accuracies": 0.34375, + "rewards/chosen": 0.00313568115234375, + "rewards/margins": 0.01727294921875, + "rewards/rejected": -0.0140533447265625, + "step": 5, + "train_speed(iter/s)": 0.245862 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 13.261017953236665, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": 0.07431640475988388, + "logits/rejected": 0.19111327826976776, + "logps/chosen": -464.0, + "logps/rejected": -506.0, + "loss": 2.1458984375, + "memory(GiB)": 40.42, + "nll_loss": 1.5578124523162842, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.6318359375, + "rewards/margins": 0.3611083924770355, + "rewards/rejected": 0.27019041776657104, + "step": 10, + "train_speed(iter/s)": 0.261302 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -2.515625, + "eval_logits/rejected": 1.03125, + "eval_logps/chosen": -362.0, + "eval_logps/rejected": -496.0, + "eval_loss": 1.279296875, + "eval_nll_loss": 0.80078125, + "eval_rewards/accuracies": 0.75, + "eval_rewards/chosen": 2.203125, + "eval_rewards/margins": 1.2734375, + "eval_rewards/rejected": 0.92578125, + "eval_runtime": 1.3679, + "eval_samples_per_second": 2.924, + "eval_steps_per_second": 0.731, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 7.875469545265156, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -0.28095704317092896, + "logits/rejected": 0.11831054836511612, + "logps/chosen": -354.0, + "logps/rejected": -488.0, + "loss": 1.00537109375, + "memory(GiB)": 40.42, + "nll_loss": 0.734375, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 3.2828125953674316, + "rewards/margins": 2.1429686546325684, + "rewards/rejected": 1.142187476158142, + "step": 15, + "train_speed(iter/s)": 0.270976 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 3.3424016920629582, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -0.8218749761581421, + "logits/rejected": 0.4154296815395355, + "logps/chosen": -259.0, + "logps/rejected": -378.3999938964844, + "loss": 1.1302978515625, + "memory(GiB)": 40.42, + "nll_loss": 0.93359375, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.75, + "rewards/margins": 3.28125, + "rewards/rejected": 3.465625047683716, + "step": 20, + "train_speed(iter/s)": 0.278471 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.1015625, + "eval_logps/chosen": -298.0, + "eval_logps/rejected": -458.0, + "eval_loss": 0.66650390625, + "eval_nll_loss": 0.5625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 8.6875, + "eval_rewards/margins": 3.875, + "eval_rewards/rejected": 4.8125, + "eval_runtime": 1.3337, + "eval_samples_per_second": 2.999, + "eval_steps_per_second": 0.75, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 2.8405582693629725, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -0.37519532442092896, + "logits/rejected": 0.10800781100988388, + "logps/chosen": -373.0, + "logps/rejected": -422.6000061035156, + "loss": 0.659814453125, + "memory(GiB)": 40.42, + "nll_loss": 0.599609375, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.087499618530273, + "rewards/margins": 4.443749904632568, + "rewards/rejected": 4.640625, + "step": 25, + "train_speed(iter/s)": 0.276451 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.9876609542956052, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -0.5821288824081421, + "logits/rejected": 0.47343748807907104, + "logps/chosen": -250.1999969482422, + "logps/rejected": -412.79998779296875, + "loss": 0.5025634765625, + "memory(GiB)": 40.42, + "nll_loss": 0.48750001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.962499618530273, + "rewards/margins": 6.112500190734863, + "rewards/rejected": 4.853125095367432, + "step": 30, + "train_speed(iter/s)": 0.279399 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -2.09375, + "eval_logits/rejected": 1.21875, + "eval_logps/chosen": -270.0, + "eval_logps/rejected": -468.0, + "eval_loss": 0.49560546875, + "eval_nll_loss": 0.49609375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.4375, + "eval_rewards/margins": 7.65625, + "eval_rewards/rejected": 3.75, + "eval_runtime": 1.3068, + "eval_samples_per_second": 3.061, + "eval_steps_per_second": 0.765, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 1.1648359643743196, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -0.4947265684604645, + "logits/rejected": 0.33867186307907104, + "logps/chosen": -379.20001220703125, + "logps/rejected": -446.0, + "loss": 0.49095458984375, + "memory(GiB)": 40.42, + "nll_loss": 0.48828125, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.987500190734863, + "rewards/margins": 8.4375, + "rewards/rejected": 3.549999952316284, + "step": 35, + "train_speed(iter/s)": 0.276953 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 1.0600846626477478, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -0.2225341796875, + "logits/rejected": 0.572460949420929, + "logps/chosen": -272.20001220703125, + "logps/rejected": -484.0, + "loss": 0.457373046875, + "memory(GiB)": 40.42, + "nll_loss": 0.561718761920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.912500381469727, + "rewards/margins": 9.862500190734863, + "rewards/rejected": 2.0640625953674316, + "step": 40, + "train_speed(iter/s)": 0.279857 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.765625, + "eval_logits/rejected": 1.6953125, + "eval_logps/chosen": -253.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.447021484375, + "eval_nll_loss": 0.447265625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.125, + "eval_rewards/margins": 12.125, + "eval_rewards/rejected": 1.046875, + "eval_runtime": 1.3537, + "eval_samples_per_second": 2.955, + "eval_steps_per_second": 0.739, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.6410222742542526, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -0.21367187798023224, + "logits/rejected": 0.8939453363418579, + "logps/chosen": -306.6000061035156, + "logps/rejected": -481.20001220703125, + "loss": 0.44505615234375, + "memory(GiB)": 40.42, + "nll_loss": 0.4449218809604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.012499809265137, + "rewards/margins": 12.600000381469727, + "rewards/rejected": 1.421875, + "step": 45, + "train_speed(iter/s)": 0.277425 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.5485582252831179, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": 0.10859374701976776, + "logits/rejected": 1.1062500476837158, + "logps/chosen": -300.3999938964844, + "logps/rejected": -463.6000061035156, + "loss": 0.4327880859375, + "memory(GiB)": 40.42, + "nll_loss": 0.43242186307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.8125, + "rewards/margins": 12.949999809265137, + "rewards/rejected": 1.8603515625, + "step": 50, + "train_speed(iter/s)": 0.280134 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.5078125, + "eval_logits/rejected": 1.9921875, + "eval_logps/chosen": -243.0, + "eval_logps/rejected": -498.0, + "eval_loss": 0.424072265625, + "eval_nll_loss": 0.423828125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.1875, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.8515625, + "eval_runtime": 1.2934, + "eval_samples_per_second": 3.093, + "eval_steps_per_second": 0.773, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2883090056174341, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -0.1586044281721115, + "logits/rejected": 1.302343726158142, + "logps/chosen": -260.3999938964844, + "logps/rejected": -433.20001220703125, + "loss": 0.40018310546875, + "memory(GiB)": 40.42, + "nll_loss": 0.4000000059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.399999618530273, + "rewards/margins": 12.8125, + "rewards/rejected": 1.5867187976837158, + "step": 55, + "train_speed(iter/s)": 0.281604 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.23683344653551686, + "learning_rate": 5e-05, + "logits/chosen": -0.05322265625, + "logits/rejected": 0.7669922113418579, + "logps/chosen": -263.3999938964844, + "logps/rejected": -406.0, + "loss": 0.2842041015625, + "memory(GiB)": 40.42, + "nll_loss": 0.2835937440395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.362500190734863, + "rewards/margins": 12.899999618530273, + "rewards/rejected": 2.457812547683716, + "step": 60, + "train_speed(iter/s)": 0.284501 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.359375, + "eval_logits/rejected": 1.984375, + "eval_logps/chosen": -241.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.42578125, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.375, + "eval_rewards/margins": 13.375, + "eval_rewards/rejected": 0.97265625, + "eval_runtime": 1.3344, + "eval_samples_per_second": 2.998, + "eval_steps_per_second": 0.749, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.6109275312536815, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -0.49492186307907104, + "logits/rejected": 1.047949194908142, + "logps/chosen": -261.3999938964844, + "logps/rejected": -409.6000061035156, + "loss": 0.33233642578125, + "memory(GiB)": 40.42, + "nll_loss": 0.3326171934604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.75, + "rewards/margins": 13.975000381469727, + "rewards/rejected": 1.7880859375, + "step": 65, + "train_speed(iter/s)": 0.2839 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.4456195770344251, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": 0.04169921949505806, + "logits/rejected": 1.0632812976837158, + "logps/chosen": -193.0, + "logps/rejected": -502.0, + "loss": 0.32952423095703126, + "memory(GiB)": 40.42, + "nll_loss": 0.32929688692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.487500190734863, + "rewards/margins": 12.350000381469727, + "rewards/rejected": 2.1187500953674316, + "step": 70, + "train_speed(iter/s)": 0.284411 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.28125, + "eval_logits/rejected": 1.9921875, + "eval_logps/chosen": -236.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.412841796875, + "eval_nll_loss": 0.412109375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.875, + "eval_rewards/margins": 14.125, + "eval_rewards/rejected": 0.7265625, + "eval_runtime": 1.3284, + "eval_samples_per_second": 3.011, + "eval_steps_per_second": 0.753, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.8365310785663989, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -0.4095703065395355, + "logits/rejected": 1.3039062023162842, + "logps/chosen": -297.6000061035156, + "logps/rejected": -457.20001220703125, + "loss": 0.40308837890625, + "memory(GiB)": 40.42, + "nll_loss": 0.4029296934604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.850000381469727, + "rewards/margins": 15.212499618530273, + "rewards/rejected": 0.65625, + "step": 75, + "train_speed(iter/s)": 0.283895 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5507859400228646, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -0.17363281548023224, + "logits/rejected": 1.2405273914337158, + "logps/chosen": -238.60000610351562, + "logps/rejected": -438.0, + "loss": 0.364111328125, + "memory(GiB)": 40.42, + "nll_loss": 0.3833984434604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.324999809265137, + "rewards/margins": 14.100000381469727, + "rewards/rejected": 1.255468726158142, + "step": 80, + "train_speed(iter/s)": 0.28289 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.25, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -235.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.407958984375, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.9375, + "eval_rewards/margins": 14.375, + "eval_rewards/rejected": 0.57421875, + "eval_runtime": 1.3449, + "eval_samples_per_second": 2.974, + "eval_steps_per_second": 0.744, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.5879222859445165, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -0.2831054627895355, + "logits/rejected": 1.407812476158142, + "logps/chosen": -250.1999969482422, + "logps/rejected": -474.0, + "loss": 0.3390655517578125, + "memory(GiB)": 40.42, + "nll_loss": 0.33906251192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.225000381469727, + "rewards/margins": 15.475000381469727, + "rewards/rejected": 0.739697277545929, + "step": 85, + "train_speed(iter/s)": 0.282808 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.28403515939320456, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -0.35429686307907104, + "logits/rejected": 0.766406238079071, + "logps/chosen": -254.39999389648438, + "logps/rejected": -423.20001220703125, + "loss": 0.3229835510253906, + "memory(GiB)": 40.42, + "nll_loss": 0.32304686307907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.274999618530273, + "rewards/margins": 14.462499618530273, + "rewards/rejected": 1.8312499523162842, + "step": 90, + "train_speed(iter/s)": 0.285525 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.2265625, + "eval_logits/rejected": 2.015625, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.406005859375, + "eval_nll_loss": 0.40625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.125, + "eval_rewards/margins": 14.5, + "eval_rewards/rejected": 0.57421875, + "eval_runtime": 1.3392, + "eval_samples_per_second": 2.987, + "eval_steps_per_second": 0.747, + "step": 90 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 114464163627008.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6eed2c4f454bc0afdc4915e40e9435f5f68cf2a4 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:777f6ba228776bfdbc6a3dde531a341ee883fdf24c50e69b6fb575f29b1c0e77 +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_logits_chosen.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_logits_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..fb3cc6127a4bd982012c5d732fc9bde2490a80bf Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_logits_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_logits_rejected.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_logits_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..38b7e6d04d7d1a823549ff6c2c5403e2b96d5524 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_logits_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_logps_chosen.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_logps_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..40e450df288327a3decefad652f19fe07bf0f7eb Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_logps_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_logps_rejected.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_logps_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..c0b94dc01917c772c03016718d3bc97ebe502dbb Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_logps_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_loss.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..e3905d3e6c0df5243eee88697fd56b400ded8175 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_nll_loss.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_nll_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..0da53416bab045ce59c9df3df669dcfc6bb6d52f Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_nll_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_rewards_accuracies.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_rewards_accuracies.png new file mode 100644 index 0000000000000000000000000000000000000000..7cff44a67bd760c5d9b6b42ce804ca85a17528fe Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_rewards_accuracies.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_rewards_chosen.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_rewards_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..f31b3e5082ce98b89b9a133f1b2e4f8c455177de Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_rewards_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_rewards_margins.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_rewards_margins.png new file mode 100644 index 0000000000000000000000000000000000000000..64c88510ef14c15b031969a4ef9f7f3c06a5c80f Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_rewards_margins.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_rewards_rejected.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_rewards_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..011ae0d9b835989a6ff55d8bd288b50f4dde25bb Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_rewards_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_runtime.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..5a0fd36e12b1a166f83b9a85b6ec33f69cc1c436 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_runtime.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_samples_per_second.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_samples_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..f16111e8c02ef7d9b7ec4f247e84a150d4b3a0d6 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_samples_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_steps_per_second.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_steps_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..1d191022363bd56a932a665307051a235d676508 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/eval_steps_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_epoch.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_epoch.png new file mode 100644 index 0000000000000000000000000000000000000000..c152c9bbb1100d680125e899d3d0da3db98dc221 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_epoch.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_grad_norm.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_grad_norm.png new file mode 100644 index 0000000000000000000000000000000000000000..c7e3183e2b62ff2a67f73a437c9ea79829ddda25 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_grad_norm.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_learning_rate.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..e35e42c173f451f9666ba448872c615528441c9e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_learning_rate.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_logits_chosen.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_logits_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..7de27e6bb68b7b0ebb1879c3d44e6e49c3210ac9 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_logits_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_logits_rejected.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_logits_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..275c2ba85bf1e467bca52b415c6a846383f5a915 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_logits_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_logps_chosen.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_logps_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..27ee2449fc852ff96834e20fce9a7d3c670ee2f6 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_logps_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_logps_rejected.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_logps_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..745063b243689c02e120587fb80c36c35bc6c587 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_logps_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_loss.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..f2fed83028b01d9b4938fae9b5d0ef03ea15b6e4 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_memory(GiB).png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_memory(GiB).png new file mode 100644 index 0000000000000000000000000000000000000000..830bbe6b47e9272e464b8fd158a35193913a3077 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_memory(GiB).png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_nll_loss.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_nll_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1cafc76a3a2535af2fd1b4dcd3f940906403df83 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_nll_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_rewards_accuracies.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_rewards_accuracies.png new file mode 100644 index 0000000000000000000000000000000000000000..10eba17d954518152db0c5872a2da2cebd11461c Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_rewards_accuracies.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_rewards_chosen.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_rewards_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..f286197bd2ff4da9f070f41db13d84ce957ea887 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_rewards_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_rewards_margins.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_rewards_margins.png new file mode 100644 index 0000000000000000000000000000000000000000..e33879ecc43c3b8cc2dcb1f100f2c84ad71db3e4 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_rewards_margins.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_rewards_rejected.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_rewards_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..f7f9b55fc42a7b071941ea98cc0c7d65e7bc13d1 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_rewards_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_total_flos.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_total_flos.png new file mode 100644 index 0000000000000000000000000000000000000000..01679f71d7240e2dfff26c8487d16cab218ebda9 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_total_flos.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_loss.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..fce04a1c60222a80ecdcd1d11c457f995f5d3174 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_runtime.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..9f9bb5f39eec1857b451932e4d2ba0b0c4595b48 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_runtime.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_samples_per_second.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_samples_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..563b9dc16b6873d561d9fff8af31054a3b1c70fb Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_samples_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_speed(iter_s).png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_speed(iter_s).png new file mode 100644 index 0000000000000000000000000000000000000000..81d320ac55428ec42567c4674391db292b55ad9a Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_speed(iter_s).png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_steps_per_second.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_steps_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..a8650e304e60e7ed2184b46147c87f795b9caf5d Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/images/train_train_steps_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/logging.jsonl b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/logging.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..29d2d6b53b2c30ad71c709768c20bdb91a2fd76d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/logging.jsonl @@ -0,0 +1,37 @@ +{"loss": 1.12939453, "grad_norm": 10.68399557, "learning_rate": 1.667e-05, "memory(GiB)": 6.7, "train_speed(iter/s)": 0.128143, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -444.0, "logps/rejected": -360.0, "logits/chosen": 0.828125, "logits/rejected": -0.28320312, "nll_loss": 0.43945312, "epoch": 0.02631579, "global_step/max_steps": "1/114", "percentage": "0.88%", "elapsed_time": "5s", "remaining_time": "9m 32s"} +{"loss": 1.87072754, "grad_norm": 14.20494057, "learning_rate": 8.333e-05, "memory(GiB)": 16.45, "train_speed(iter/s)": 0.245862, "rewards/chosen": 0.00313568, "rewards/rejected": -0.01405334, "rewards/accuracies": 0.34375, "rewards/margins": 0.01727295, "logps/chosen": -377.25, "logps/rejected": -512.5, "logits/chosen": -0.12890625, "logits/rejected": 0.03588867, "nll_loss": 1.17675781, "epoch": 0.13157895, "global_step/max_steps": "5/114", "percentage": "4.39%", "elapsed_time": "17s", "remaining_time": "6m 23s"} +{"loss": 2.14589844, "grad_norm": 13.26101795, "learning_rate": 9.966e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.261302, "rewards/chosen": 0.63183594, "rewards/rejected": 0.27019042, "rewards/accuracies": 0.60000002, "rewards/margins": 0.36110839, "logps/chosen": -464.0, "logps/rejected": -506.0, "logits/chosen": 0.0743164, "logits/rejected": 0.19111328, "nll_loss": 1.55781245, "epoch": 0.26315789, "global_step/max_steps": "10/114", "percentage": "8.77%", "elapsed_time": "35s", "remaining_time": "6m 9s"} +{"eval_loss": 1.27929688, "eval_runtime": 1.3679, "eval_samples_per_second": 2.924, "eval_steps_per_second": 0.731, "eval_rewards/chosen": 2.203125, "eval_rewards/rejected": 0.92578125, "eval_rewards/accuracies": 0.75, "eval_rewards/margins": 1.2734375, "eval_logps/chosen": -362.0, "eval_logps/rejected": -496.0, "eval_logits/chosen": -2.515625, "eval_logits/rejected": 1.03125, "eval_nll_loss": 0.80078125, "epoch": 0.26315789, "global_step/max_steps": "10/114", "percentage": "8.77%", "elapsed_time": "36s", "remaining_time": "6m 23s"} +{"loss": 1.00537109, "grad_norm": 7.87546955, "learning_rate": 9.83e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.270976, "rewards/chosen": 3.2828126, "rewards/rejected": 1.14218748, "rewards/accuracies": 0.89999998, "rewards/margins": 2.14296865, "logps/chosen": -354.0, "logps/rejected": -488.0, "logits/chosen": -0.28095704, "logits/rejected": 0.11831055, "nll_loss": 0.734375, "epoch": 0.39473684, "global_step/max_steps": "15/114", "percentage": "13.16%", "elapsed_time": "52s", "remaining_time": "5m 47s"} +{"loss": 1.13029785, "grad_norm": 3.34240169, "learning_rate": 9.591e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.278471, "rewards/chosen": 6.75, "rewards/rejected": 3.46562505, "rewards/accuracies": 0.92500001, "rewards/margins": 3.28125, "logps/chosen": -259.0, "logps/rejected": -378.3999939, "logits/chosen": -0.82187498, "logits/rejected": 0.41542968, "nll_loss": 0.93359375, "epoch": 0.52631579, "global_step/max_steps": "20/114", "percentage": "17.54%", "elapsed_time": "1m 9s", "remaining_time": "5m 24s"} +{"eval_loss": 0.66650391, "eval_runtime": 1.3337, "eval_samples_per_second": 2.999, "eval_steps_per_second": 0.75, "eval_rewards/chosen": 8.6875, "eval_rewards/rejected": 4.8125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 3.875, "eval_logps/chosen": -298.0, "eval_logps/rejected": -458.0, "eval_logits/chosen": -2.5, "eval_logits/rejected": 1.1015625, "eval_nll_loss": 0.5625, "epoch": 0.52631579, "global_step/max_steps": "20/114", "percentage": "17.54%", "elapsed_time": "1m 10s", "remaining_time": "5m 30s"} +{"loss": 0.65981445, "grad_norm": 2.84055827, "learning_rate": 9.256e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.276451, "rewards/chosen": 9.08749962, "rewards/rejected": 4.640625, "rewards/accuracies": 1.0, "rewards/margins": 4.4437499, "logps/chosen": -373.0, "logps/rejected": -422.6000061, "logits/chosen": -0.37519532, "logits/rejected": 0.10800781, "nll_loss": 0.59960938, "epoch": 0.65789474, "global_step/max_steps": "25/114", "percentage": "21.93%", "elapsed_time": "1m 27s", "remaining_time": "5m 12s"} +{"loss": 0.50256348, "grad_norm": 0.98766095, "learning_rate": 8.83e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.279399, "rewards/chosen": 10.96249962, "rewards/rejected": 4.8531251, "rewards/accuracies": 1.0, "rewards/margins": 6.11250019, "logps/chosen": -250.19999695, "logps/rejected": -412.79998779, "logits/chosen": -0.58212888, "logits/rejected": 0.47343749, "nll_loss": 0.48750001, "epoch": 0.78947368, "global_step/max_steps": "30/114", "percentage": "26.32%", "elapsed_time": "1m 44s", "remaining_time": "4m 52s"} +{"eval_loss": 0.49560547, "eval_runtime": 1.3068, "eval_samples_per_second": 3.061, "eval_steps_per_second": 0.765, "eval_rewards/chosen": 11.4375, "eval_rewards/rejected": 3.75, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 7.65625, "eval_logps/chosen": -270.0, "eval_logps/rejected": -468.0, "eval_logits/chosen": -2.09375, "eval_logits/rejected": 1.21875, "eval_nll_loss": 0.49609375, "epoch": 0.78947368, "global_step/max_steps": "30/114", "percentage": "26.32%", "elapsed_time": "1m 45s", "remaining_time": "4m 56s"} +{"loss": 0.49095459, "grad_norm": 1.16483596, "learning_rate": 8.324e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.276953, "rewards/chosen": 11.98750019, "rewards/rejected": 3.54999995, "rewards/accuracies": 1.0, "rewards/margins": 8.4375, "logps/chosen": -379.20001221, "logps/rejected": -446.0, "logits/chosen": -0.49472657, "logits/rejected": 0.33867186, "nll_loss": 0.48828125, "epoch": 0.92105263, "global_step/max_steps": "35/114", "percentage": "30.70%", "elapsed_time": "2m 3s", "remaining_time": "4m 39s"} +{"loss": 0.45737305, "grad_norm": 1.06008466, "learning_rate": 7.748e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.279857, "rewards/chosen": 11.91250038, "rewards/rejected": 2.0640626, "rewards/accuracies": 1.0, "rewards/margins": 9.86250019, "logps/chosen": -272.20001221, "logps/rejected": -484.0, "logits/chosen": -0.22253418, "logits/rejected": 0.57246095, "nll_loss": 0.56171876, "epoch": 1.05263158, "global_step/max_steps": "40/114", "percentage": "35.09%", "elapsed_time": "2m 20s", "remaining_time": "4m 19s"} +{"eval_loss": 0.44702148, "eval_runtime": 1.3537, "eval_samples_per_second": 2.955, "eval_steps_per_second": 0.739, "eval_rewards/chosen": 13.125, "eval_rewards/rejected": 1.046875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 12.125, "eval_logps/chosen": -253.0, "eval_logps/rejected": -496.0, "eval_logits/chosen": -1.765625, "eval_logits/rejected": 1.6953125, "eval_nll_loss": 0.44726562, "epoch": 1.05263158, "global_step/max_steps": "40/114", "percentage": "35.09%", "elapsed_time": "2m 21s", "remaining_time": "4m 21s"} +{"loss": 0.44505615, "grad_norm": 0.64102227, "learning_rate": 7.113e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.277425, "rewards/chosen": 14.01249981, "rewards/rejected": 1.421875, "rewards/accuracies": 1.0, "rewards/margins": 12.60000038, "logps/chosen": -306.6000061, "logps/rejected": -481.20001221, "logits/chosen": -0.21367188, "logits/rejected": 0.89394534, "nll_loss": 0.44492188, "epoch": 1.18421053, "global_step/max_steps": "45/114", "percentage": "39.47%", "elapsed_time": "2m 39s", "remaining_time": "4m 4s"} +{"loss": 0.43278809, "grad_norm": 0.54855823, "learning_rate": 6.434e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.280134, "rewards/chosen": 14.8125, "rewards/rejected": 1.86035156, "rewards/accuracies": 1.0, "rewards/margins": 12.94999981, "logps/chosen": -300.3999939, "logps/rejected": -463.6000061, "logits/chosen": 0.10859375, "logits/rejected": 1.10625005, "nll_loss": 0.43242186, "epoch": 1.31578947, "global_step/max_steps": "50/114", "percentage": "43.86%", "elapsed_time": "2m 55s", "remaining_time": "3m 44s"} +{"eval_loss": 0.42407227, "eval_runtime": 1.2934, "eval_samples_per_second": 3.093, "eval_steps_per_second": 0.773, "eval_rewards/chosen": 14.1875, "eval_rewards/rejected": 0.8515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -243.0, "eval_logps/rejected": -498.0, "eval_logits/chosen": -1.5078125, "eval_logits/rejected": 1.9921875, "eval_nll_loss": 0.42382812, "epoch": 1.31578947, "global_step/max_steps": "50/114", "percentage": "43.86%", "elapsed_time": "2m 57s", "remaining_time": "3m 46s"} +{"loss": 0.40018311, "grad_norm": 0.28830901, "learning_rate": 5.725e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.281604, "rewards/chosen": 14.39999962, "rewards/rejected": 1.5867188, "rewards/accuracies": 1.0, "rewards/margins": 12.8125, "logps/chosen": -260.3999939, "logps/rejected": -433.20001221, "logits/chosen": -0.15860443, "logits/rejected": 1.30234373, "nll_loss": 0.40000001, "epoch": 1.44736842, "global_step/max_steps": "55/114", "percentage": "48.25%", "elapsed_time": "3m 12s", "remaining_time": "3m 26s"} +{"loss": 0.2842041, "grad_norm": 0.23683345, "learning_rate": 5e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.284501, "rewards/chosen": 15.36250019, "rewards/rejected": 2.45781255, "rewards/accuracies": 1.0, "rewards/margins": 12.89999962, "logps/chosen": -263.3999939, "logps/rejected": -406.0, "logits/chosen": -0.05322266, "logits/rejected": 0.76699221, "nll_loss": 0.28359374, "epoch": 1.57894737, "global_step/max_steps": "60/114", "percentage": "52.63%", "elapsed_time": "3m 28s", "remaining_time": "3m 7s"} +{"eval_loss": 0.42578125, "eval_runtime": 1.3344, "eval_samples_per_second": 2.998, "eval_steps_per_second": 0.749, "eval_rewards/chosen": 14.375, "eval_rewards/rejected": 0.97265625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -241.0, "eval_logps/rejected": -496.0, "eval_logits/chosen": -1.359375, "eval_logits/rejected": 1.984375, "eval_nll_loss": 0.42578125, "epoch": 1.57894737, "global_step/max_steps": "60/114", "percentage": "52.63%", "elapsed_time": "3m 29s", "remaining_time": "3m 8s"} +{"loss": 0.33233643, "grad_norm": 0.61092753, "learning_rate": 4.275e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.2839, "rewards/chosen": 15.75, "rewards/rejected": 1.78808594, "rewards/accuracies": 1.0, "rewards/margins": 13.97500038, "logps/chosen": -261.3999939, "logps/rejected": -409.6000061, "logits/chosen": -0.49492186, "logits/rejected": 1.04794919, "nll_loss": 0.33261719, "epoch": 1.71052632, "global_step/max_steps": "65/114", "percentage": "57.02%", "elapsed_time": "3m 46s", "remaining_time": "2m 50s"} +{"loss": 0.32952423, "grad_norm": 0.44561958, "learning_rate": 3.566e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.284411, "rewards/chosen": 14.48750019, "rewards/rejected": 2.1187501, "rewards/accuracies": 1.0, "rewards/margins": 12.35000038, "logps/chosen": -193.0, "logps/rejected": -502.0, "logits/chosen": 0.04169922, "logits/rejected": 1.0632813, "nll_loss": 0.32929689, "epoch": 1.84210526, "global_step/max_steps": "70/114", "percentage": "61.40%", "elapsed_time": "4m 3s", "remaining_time": "2m 32s"} +{"eval_loss": 0.4128418, "eval_runtime": 1.3284, "eval_samples_per_second": 3.011, "eval_steps_per_second": 0.753, "eval_rewards/chosen": 14.875, "eval_rewards/rejected": 0.7265625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.125, "eval_logps/chosen": -236.0, "eval_logps/rejected": -500.0, "eval_logits/chosen": -1.28125, "eval_logits/rejected": 1.9921875, "eval_nll_loss": 0.41210938, "epoch": 1.84210526, "global_step/max_steps": "70/114", "percentage": "61.40%", "elapsed_time": "4m 4s", "remaining_time": "2m 33s"} +{"loss": 0.40308838, "grad_norm": 0.83653108, "learning_rate": 2.887e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.283895, "rewards/chosen": 15.85000038, "rewards/rejected": 0.65625, "rewards/accuracies": 1.0, "rewards/margins": 15.21249962, "logps/chosen": -297.6000061, "logps/rejected": -457.20001221, "logits/chosen": -0.40957031, "logits/rejected": 1.3039062, "nll_loss": 0.40292969, "epoch": 1.97368421, "global_step/max_steps": "75/114", "percentage": "65.79%", "elapsed_time": "4m 21s", "remaining_time": "2m 15s"} +{"loss": 0.36411133, "grad_norm": 0.55078594, "learning_rate": 2.252e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.28289, "rewards/chosen": 15.32499981, "rewards/rejected": 1.25546873, "rewards/accuracies": 1.0, "rewards/margins": 14.10000038, "logps/chosen": -238.6000061, "logps/rejected": -438.0, "logits/chosen": -0.17363282, "logits/rejected": 1.24052739, "nll_loss": 0.38339844, "epoch": 2.10526316, "global_step/max_steps": "80/114", "percentage": "70.18%", "elapsed_time": "4m 40s", "remaining_time": "1m 59s"} +{"eval_loss": 0.40795898, "eval_runtime": 1.3449, "eval_samples_per_second": 2.974, "eval_steps_per_second": 0.744, "eval_rewards/chosen": 14.9375, "eval_rewards/rejected": 0.57421875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.375, "eval_logps/chosen": -235.0, "eval_logps/rejected": -500.0, "eval_logits/chosen": -1.25, "eval_logits/rejected": 2.015625, "eval_nll_loss": 0.40820312, "epoch": 2.10526316, "global_step/max_steps": "80/114", "percentage": "70.18%", "elapsed_time": "4m 41s", "remaining_time": "1m 59s"} +{"loss": 0.33906555, "grad_norm": 0.58792229, "learning_rate": 1.676e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.282808, "rewards/chosen": 16.22500038, "rewards/rejected": 0.73969728, "rewards/accuracies": 1.0, "rewards/margins": 15.47500038, "logps/chosen": -250.19999695, "logps/rejected": -474.0, "logits/chosen": -0.28310546, "logits/rejected": 1.40781248, "nll_loss": 0.33906251, "epoch": 2.23684211, "global_step/max_steps": "85/114", "percentage": "74.56%", "elapsed_time": "4m 57s", "remaining_time": "1m 41s"} +{"loss": 0.32298355, "grad_norm": 0.28403516, "learning_rate": 1.17e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.285525, "rewards/chosen": 16.27499962, "rewards/rejected": 1.83124995, "rewards/accuracies": 1.0, "rewards/margins": 14.46249962, "logps/chosen": -254.3999939, "logps/rejected": -423.20001221, "logits/chosen": -0.35429686, "logits/rejected": 0.76640624, "nll_loss": 0.32304686, "epoch": 2.36842105, "global_step/max_steps": "90/114", "percentage": "78.95%", "elapsed_time": "5m 12s", "remaining_time": "1m 23s"} +{"eval_loss": 0.40600586, "eval_runtime": 1.3392, "eval_samples_per_second": 2.987, "eval_steps_per_second": 0.747, "eval_rewards/chosen": 15.125, "eval_rewards/rejected": 0.57421875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.5, "eval_logps/chosen": -234.0, "eval_logps/rejected": -500.0, "eval_logits/chosen": -1.2265625, "eval_logits/rejected": 2.015625, "eval_nll_loss": 0.40625, "epoch": 2.36842105, "global_step/max_steps": "90/114", "percentage": "78.95%", "elapsed_time": "5m 13s", "remaining_time": "1m 23s"} +{"loss": 0.41544189, "grad_norm": 0.2494345, "learning_rate": 7.44e-06, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.283981, "rewards/chosen": 17.42499924, "rewards/rejected": 0.64423829, "rewards/accuracies": 1.0, "rewards/margins": 16.76250076, "logps/chosen": -312.3999939, "logps/rejected": -528.79998779, "logits/chosen": -0.19296876, "logits/rejected": 1.05624998, "nll_loss": 0.41523439, "epoch": 2.5, "global_step/max_steps": "95/114", "percentage": "83.33%", "elapsed_time": "5m 31s", "remaining_time": "1m 6s"} +{"loss": 0.35161133, "grad_norm": 0.45907393, "learning_rate": 4.09e-06, "memory(GiB)": 49.67, "train_speed(iter/s)": 0.285707, "rewards/chosen": 16.45000076, "rewards/rejected": 0.86484373, "rewards/accuracies": 1.0, "rewards/margins": 15.57499981, "logps/chosen": -245.8999939, "logps/rejected": -407.20001221, "logits/chosen": -0.52402341, "logits/rejected": 1.39531255, "nll_loss": 0.3515625, "epoch": 2.63157895, "global_step/max_steps": "100/114", "percentage": "87.72%", "elapsed_time": "5m 47s", "remaining_time": "48s"} +{"eval_loss": 0.4074707, "eval_runtime": 1.3287, "eval_samples_per_second": 3.01, "eval_steps_per_second": 0.753, "eval_rewards/chosen": 15.0625, "eval_rewards/rejected": 0.4765625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.625, "eval_logps/chosen": -234.0, "eval_logps/rejected": -502.0, "eval_logits/chosen": -1.21875, "eval_logits/rejected": 2.015625, "eval_nll_loss": 0.40820312, "epoch": 2.63157895, "global_step/max_steps": "100/114", "percentage": "87.72%", "elapsed_time": "5m 48s", "remaining_time": "48s"} +{"loss": 0.31674805, "grad_norm": 0.82454908, "learning_rate": 1.7e-06, "memory(GiB)": 49.67, "train_speed(iter/s)": 0.285138, "rewards/chosen": 16.01250076, "rewards/rejected": 2.12812495, "rewards/accuracies": 1.0, "rewards/margins": 13.86250019, "logps/chosen": -228.6000061, "logps/rejected": -441.20001221, "logits/chosen": -0.30781251, "logits/rejected": 1.14218748, "nll_loss": 0.31640625, "epoch": 2.76315789, "global_step/max_steps": "105/114", "percentage": "92.11%", "elapsed_time": "6m 5s", "remaining_time": "31s"} +{"loss": 0.31386414, "grad_norm": 0.44538296, "learning_rate": 3.4e-07, "memory(GiB)": 49.67, "train_speed(iter/s)": 0.286087, "rewards/chosen": 15.85000038, "rewards/rejected": 1.97265625, "rewards/accuracies": 1.0, "rewards/margins": 13.86250019, "logps/chosen": -212.3999939, "logps/rejected": -445.3999939, "logits/chosen": -0.34589845, "logits/rejected": 1.19062495, "nll_loss": 0.31367189, "epoch": 2.89473684, "global_step/max_steps": "110/114", "percentage": "96.49%", "elapsed_time": "6m 21s", "remaining_time": "13s"} +{"eval_loss": 0.40600586, "eval_runtime": 1.3327, "eval_samples_per_second": 3.001, "eval_steps_per_second": 0.75, "eval_rewards/chosen": 15.125, "eval_rewards/rejected": 0.42578125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.6875, "eval_logps/chosen": -234.0, "eval_logps/rejected": -502.0, "eval_logits/chosen": -1.21875, "eval_logits/rejected": 2.015625, "eval_nll_loss": 0.40625, "epoch": 2.89473684, "global_step/max_steps": "110/114", "percentage": "96.49%", "elapsed_time": "6m 23s", "remaining_time": "13s"} +{"eval_loss": 0.4074707, "eval_runtime": 1.2973, "eval_samples_per_second": 3.083, "eval_steps_per_second": 0.771, "eval_rewards/chosen": 15.0625, "eval_rewards/rejected": 0.57421875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.5, "eval_logps/chosen": -234.0, "eval_logps/rejected": -500.0, "eval_logits/chosen": -1.21875, "eval_logits/rejected": 2.015625, "eval_nll_loss": 0.40820312, "epoch": 3.0, "global_step/max_steps": "114/114", "percentage": "100.00%", "elapsed_time": "6m 39s", "remaining_time": "0s"} +{"train_runtime": 400.8253, "train_samples_per_second": 2.23, "train_steps_per_second": 0.284, "total_flos": 145005522976768.0, "train_loss": 0.58423785, "epoch": 3.0, "global_step/max_steps": "114/114", "percentage": "100.00%", "elapsed_time": "6m 40s", "remaining_time": "0s"} +{"train_dataset": "1698.815436±897.000106, min=182.000000, max=4081.000000, size=298", "val_dataset": "1637.250000±797.581461, min=755.000000, max=2485.000000, size=4", "model_parameter_info": "PeftModelForCausalLM: 7635.8016M Params (20.1851M Trainable [0.2643%]), 0.0001M Buffers.", "last_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-114", "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/checkpoint-90", "best_metric": 0.40600586, "global_step": 114, "log_history": [{"loss": 1.12939453125, "grad_norm": 10.683995568480029, "learning_rate": 1.6666666666666667e-05, "memory(GiB)": 6.7, "train_speed(iter/s)": 0.128143, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -444.0, "logps/rejected": -360.0, "logits/chosen": 0.828125, "logits/rejected": -0.283203125, "nll_loss": 0.439453125, "epoch": 0.02631578947368421, "step": 1}, {"loss": 1.8707275390625, "grad_norm": 14.204940567356727, "learning_rate": 8.333333333333334e-05, "memory(GiB)": 16.45, "train_speed(iter/s)": 0.245862, "rewards/chosen": 0.00313568115234375, "rewards/rejected": -0.0140533447265625, "rewards/accuracies": 0.34375, "rewards/margins": 0.01727294921875, "logps/chosen": -377.25, "logps/rejected": -512.5, "logits/chosen": -0.12890625, "logits/rejected": 0.035888671875, "nll_loss": 1.1767578125, "epoch": 0.13157894736842105, "step": 5}, {"loss": 2.1458984375, "grad_norm": 13.261017953236665, "learning_rate": 9.966191788709716e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.261302, "rewards/chosen": 0.6318359375, "rewards/rejected": 0.27019041776657104, "rewards/accuracies": 0.6000000238418579, "rewards/margins": 0.3611083924770355, "logps/chosen": -464.0, "logps/rejected": -506.0, "logits/chosen": 0.07431640475988388, "logits/rejected": 0.19111327826976776, "nll_loss": 1.5578124523162842, "epoch": 0.2631578947368421, "step": 10}, {"eval_loss": 1.279296875, "eval_runtime": 1.3679, "eval_samples_per_second": 2.924, "eval_steps_per_second": 0.731, "eval_rewards/chosen": 2.203125, "eval_rewards/rejected": 0.92578125, "eval_rewards/accuracies": 0.75, "eval_rewards/margins": 1.2734375, "eval_logps/chosen": -362.0, "eval_logps/rejected": -496.0, "eval_logits/chosen": -2.515625, "eval_logits/rejected": 1.03125, "eval_nll_loss": 0.80078125, "epoch": 0.2631578947368421, "step": 10}, {"loss": 1.00537109375, "grad_norm": 7.875469545265156, "learning_rate": 9.829629131445342e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.270976, "rewards/chosen": 3.2828125953674316, "rewards/rejected": 1.142187476158142, "rewards/accuracies": 0.8999999761581421, "rewards/margins": 2.1429686546325684, "logps/chosen": -354.0, "logps/rejected": -488.0, "logits/chosen": -0.28095704317092896, "logits/rejected": 0.11831054836511612, "nll_loss": 0.734375, "epoch": 0.39473684210526316, "step": 15}, {"loss": 1.1302978515625, "grad_norm": 3.3424016920629582, "learning_rate": 9.591080534401371e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.278471, "rewards/chosen": 6.75, "rewards/rejected": 3.465625047683716, "rewards/accuracies": 0.925000011920929, "rewards/margins": 3.28125, "logps/chosen": -259.0, "logps/rejected": -378.3999938964844, "logits/chosen": -0.8218749761581421, "logits/rejected": 0.4154296815395355, "nll_loss": 0.93359375, "epoch": 0.5263157894736842, "step": 20}, {"eval_loss": 0.66650390625, "eval_runtime": 1.3337, "eval_samples_per_second": 2.999, "eval_steps_per_second": 0.75, "eval_rewards/chosen": 8.6875, "eval_rewards/rejected": 4.8125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 3.875, "eval_logps/chosen": -298.0, "eval_logps/rejected": -458.0, "eval_logits/chosen": -2.5, "eval_logits/rejected": 1.1015625, "eval_nll_loss": 0.5625, "epoch": 0.5263157894736842, "step": 20}, {"loss": 0.659814453125, "grad_norm": 2.8405582693629725, "learning_rate": 9.255583362184999e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.276451, "rewards/chosen": 9.087499618530273, "rewards/rejected": 4.640625, "rewards/accuracies": 1.0, "rewards/margins": 4.443749904632568, "logps/chosen": -373.0, "logps/rejected": -422.6000061035156, "logits/chosen": -0.37519532442092896, "logits/rejected": 0.10800781100988388, "nll_loss": 0.599609375, "epoch": 0.6578947368421053, "step": 25}, {"loss": 0.5025634765625, "grad_norm": 0.9876609542956052, "learning_rate": 8.83022221559489e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.279399, "rewards/chosen": 10.962499618530273, "rewards/rejected": 4.853125095367432, "rewards/accuracies": 1.0, "rewards/margins": 6.112500190734863, "logps/chosen": -250.1999969482422, "logps/rejected": -412.79998779296875, "logits/chosen": -0.5821288824081421, "logits/rejected": 0.47343748807907104, "nll_loss": 0.48750001192092896, "epoch": 0.7894736842105263, "step": 30}, {"eval_loss": 0.49560546875, "eval_runtime": 1.3068, "eval_samples_per_second": 3.061, "eval_steps_per_second": 0.765, "eval_rewards/chosen": 11.4375, "eval_rewards/rejected": 3.75, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 7.65625, "eval_logps/chosen": -270.0, "eval_logps/rejected": -468.0, "eval_logits/chosen": -2.09375, "eval_logits/rejected": 1.21875, "eval_nll_loss": 0.49609375, "epoch": 0.7894736842105263, "step": 30}, {"loss": 0.49095458984375, "grad_norm": 1.1648359643743196, "learning_rate": 8.323979328069689e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.276953, "rewards/chosen": 11.987500190734863, "rewards/rejected": 3.549999952316284, "rewards/accuracies": 1.0, "rewards/margins": 8.4375, "logps/chosen": -379.20001220703125, "logps/rejected": -446.0, "logits/chosen": -0.4947265684604645, "logits/rejected": 0.33867186307907104, "nll_loss": 0.48828125, "epoch": 0.9210526315789473, "step": 35}, {"loss": 0.457373046875, "grad_norm": 1.0600846626477478, "learning_rate": 7.74754489035403e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.279857, "rewards/chosen": 11.912500381469727, "rewards/rejected": 2.0640625953674316, "rewards/accuracies": 1.0, "rewards/margins": 9.862500190734863, "logps/chosen": -272.20001220703125, "logps/rejected": -484.0, "logits/chosen": -0.2225341796875, "logits/rejected": 0.572460949420929, "nll_loss": 0.561718761920929, "epoch": 1.0526315789473684, "step": 40}, {"eval_loss": 0.447021484375, "eval_runtime": 1.3537, "eval_samples_per_second": 2.955, "eval_steps_per_second": 0.739, "eval_rewards/chosen": 13.125, "eval_rewards/rejected": 1.046875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 12.125, "eval_logps/chosen": -253.0, "eval_logps/rejected": -496.0, "eval_logits/chosen": -1.765625, "eval_logits/rejected": 1.6953125, "eval_nll_loss": 0.447265625, "epoch": 1.0526315789473684, "step": 40}, {"loss": 0.44505615234375, "grad_norm": 0.6410222742542526, "learning_rate": 7.113091308703498e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.277425, "rewards/chosen": 14.012499809265137, "rewards/rejected": 1.421875, "rewards/accuracies": 1.0, "rewards/margins": 12.600000381469727, "logps/chosen": -306.6000061035156, "logps/rejected": -481.20001220703125, "logits/chosen": -0.21367187798023224, "logits/rejected": 0.8939453363418579, "nll_loss": 0.4449218809604645, "epoch": 1.1842105263157894, "step": 45}, {"loss": 0.4327880859375, "grad_norm": 0.5485582252831179, "learning_rate": 6.434016163555452e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.280134, "rewards/chosen": 14.8125, "rewards/rejected": 1.8603515625, "rewards/accuracies": 1.0, "rewards/margins": 12.949999809265137, "logps/chosen": -300.3999938964844, "logps/rejected": -463.6000061035156, "logits/chosen": 0.10859374701976776, "logits/rejected": 1.1062500476837158, "nll_loss": 0.43242186307907104, "epoch": 1.3157894736842106, "step": 50}, {"eval_loss": 0.424072265625, "eval_runtime": 1.2934, "eval_samples_per_second": 3.093, "eval_steps_per_second": 0.773, "eval_rewards/chosen": 14.1875, "eval_rewards/rejected": 0.8515625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -243.0, "eval_logps/rejected": -498.0, "eval_logits/chosen": -1.5078125, "eval_logits/rejected": 1.9921875, "eval_nll_loss": 0.423828125, "epoch": 1.3157894736842106, "step": 50}, {"loss": 0.40018310546875, "grad_norm": 0.2883090056174341, "learning_rate": 5.724659296536233e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.281604, "rewards/chosen": 14.399999618530273, "rewards/rejected": 1.5867187976837158, "rewards/accuracies": 1.0, "rewards/margins": 12.8125, "logps/chosen": -260.3999938964844, "logps/rejected": -433.20001220703125, "logits/chosen": -0.1586044281721115, "logits/rejected": 1.302343726158142, "nll_loss": 0.4000000059604645, "epoch": 1.4473684210526316, "step": 55}, {"loss": 0.2842041015625, "grad_norm": 0.23683344653551686, "learning_rate": 5e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.284501, "rewards/chosen": 15.362500190734863, "rewards/rejected": 2.457812547683716, "rewards/accuracies": 1.0, "rewards/margins": 12.899999618530273, "logps/chosen": -263.3999938964844, "logps/rejected": -406.0, "logits/chosen": -0.05322265625, "logits/rejected": 0.7669922113418579, "nll_loss": 0.2835937440395355, "epoch": 1.5789473684210527, "step": 60}, {"eval_loss": 0.42578125, "eval_runtime": 1.3344, "eval_samples_per_second": 2.998, "eval_steps_per_second": 0.749, "eval_rewards/chosen": 14.375, "eval_rewards/rejected": 0.97265625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.375, "eval_logps/chosen": -241.0, "eval_logps/rejected": -496.0, "eval_logits/chosen": -1.359375, "eval_logits/rejected": 1.984375, "eval_nll_loss": 0.42578125, "epoch": 1.5789473684210527, "step": 60}, {"loss": 0.33233642578125, "grad_norm": 0.6109275312536815, "learning_rate": 4.275340703463767e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.2839, "rewards/chosen": 15.75, "rewards/rejected": 1.7880859375, "rewards/accuracies": 1.0, "rewards/margins": 13.975000381469727, "logps/chosen": -261.3999938964844, "logps/rejected": -409.6000061035156, "logits/chosen": -0.49492186307907104, "logits/rejected": 1.047949194908142, "nll_loss": 0.3326171934604645, "epoch": 1.7105263157894737, "step": 65}, {"loss": 0.32952423095703126, "grad_norm": 0.4456195770344251, "learning_rate": 3.5659838364445505e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.284411, "rewards/chosen": 14.487500190734863, "rewards/rejected": 2.1187500953674316, "rewards/accuracies": 1.0, "rewards/margins": 12.350000381469727, "logps/chosen": -193.0, "logps/rejected": -502.0, "logits/chosen": 0.04169921949505806, "logits/rejected": 1.0632812976837158, "nll_loss": 0.32929688692092896, "epoch": 1.8421052631578947, "step": 70}, {"eval_loss": 0.412841796875, "eval_runtime": 1.3284, "eval_samples_per_second": 3.011, "eval_steps_per_second": 0.753, "eval_rewards/chosen": 14.875, "eval_rewards/rejected": 0.7265625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.125, "eval_logps/chosen": -236.0, "eval_logps/rejected": -500.0, "eval_logits/chosen": -1.28125, "eval_logits/rejected": 1.9921875, "eval_nll_loss": 0.412109375, "epoch": 1.8421052631578947, "step": 70}, {"loss": 0.40308837890625, "grad_norm": 0.8365310785663989, "learning_rate": 2.886908691296504e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.283895, "rewards/chosen": 15.850000381469727, "rewards/rejected": 0.65625, "rewards/accuracies": 1.0, "rewards/margins": 15.212499618530273, "logps/chosen": -297.6000061035156, "logps/rejected": -457.20001220703125, "logits/chosen": -0.4095703065395355, "logits/rejected": 1.3039062023162842, "nll_loss": 0.4029296934604645, "epoch": 1.973684210526316, "step": 75}, {"loss": 0.364111328125, "grad_norm": 0.5507859400228646, "learning_rate": 2.25245510964597e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.28289, "rewards/chosen": 15.324999809265137, "rewards/rejected": 1.255468726158142, "rewards/accuracies": 1.0, "rewards/margins": 14.100000381469727, "logps/chosen": -238.60000610351562, "logps/rejected": -438.0, "logits/chosen": -0.17363281548023224, "logits/rejected": 1.2405273914337158, "nll_loss": 0.3833984434604645, "epoch": 2.1052631578947367, "step": 80}, {"eval_loss": 0.407958984375, "eval_runtime": 1.3449, "eval_samples_per_second": 2.974, "eval_steps_per_second": 0.744, "eval_rewards/chosen": 14.9375, "eval_rewards/rejected": 0.57421875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.375, "eval_logps/chosen": -235.0, "eval_logps/rejected": -500.0, "eval_logits/chosen": -1.25, "eval_logits/rejected": 2.015625, "eval_nll_loss": 0.408203125, "epoch": 2.1052631578947367, "step": 80}, {"loss": 0.3390655517578125, "grad_norm": 0.5879222859445165, "learning_rate": 1.6760206719303105e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.282808, "rewards/chosen": 16.225000381469727, "rewards/rejected": 0.739697277545929, "rewards/accuracies": 1.0, "rewards/margins": 15.475000381469727, "logps/chosen": -250.1999969482422, "logps/rejected": -474.0, "logits/chosen": -0.2831054627895355, "logits/rejected": 1.407812476158142, "nll_loss": 0.33906251192092896, "epoch": 2.236842105263158, "step": 85}, {"loss": 0.3229835510253906, "grad_norm": 0.28403515939320456, "learning_rate": 1.1697777844051105e-05, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.285525, "rewards/chosen": 16.274999618530273, "rewards/rejected": 1.8312499523162842, "rewards/accuracies": 1.0, "rewards/margins": 14.462499618530273, "logps/chosen": -254.39999389648438, "logps/rejected": -423.20001220703125, "logits/chosen": -0.35429686307907104, "logits/rejected": 0.766406238079071, "nll_loss": 0.32304686307907104, "epoch": 2.3684210526315788, "step": 90}, {"eval_loss": 0.406005859375, "eval_runtime": 1.3392, "eval_samples_per_second": 2.987, "eval_steps_per_second": 0.747, "eval_rewards/chosen": 15.125, "eval_rewards/rejected": 0.57421875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.5, "eval_logps/chosen": -234.0, "eval_logps/rejected": -500.0, "eval_logits/chosen": -1.2265625, "eval_logits/rejected": 2.015625, "eval_nll_loss": 0.40625, "epoch": 2.3684210526315788, "step": 90}, {"loss": 0.41544189453125, "grad_norm": 0.2494345039535932, "learning_rate": 7.444166378150013e-06, "memory(GiB)": 40.42, "train_speed(iter/s)": 0.283981, "rewards/chosen": 17.424999237060547, "rewards/rejected": 0.644238293170929, "rewards/accuracies": 1.0, "rewards/margins": 16.762500762939453, "logps/chosen": -312.3999938964844, "logps/rejected": -528.7999877929688, "logits/chosen": -0.19296875596046448, "logits/rejected": 1.056249976158142, "nll_loss": 0.41523438692092896, "epoch": 2.5, "step": 95}, {"loss": 0.351611328125, "grad_norm": 0.45907392711831513, "learning_rate": 4.089194655986306e-06, "memory(GiB)": 49.67, "train_speed(iter/s)": 0.285707, "rewards/chosen": 16.450000762939453, "rewards/rejected": 0.8648437261581421, "rewards/accuracies": 1.0, "rewards/margins": 15.574999809265137, "logps/chosen": -245.89999389648438, "logps/rejected": -407.20001220703125, "logits/chosen": -0.5240234136581421, "logits/rejected": 1.3953125476837158, "nll_loss": 0.3515625, "epoch": 2.6315789473684212, "step": 100}, {"eval_loss": 0.407470703125, "eval_runtime": 1.3287, "eval_samples_per_second": 3.01, "eval_steps_per_second": 0.753, "eval_rewards/chosen": 15.0625, "eval_rewards/rejected": 0.4765625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.625, "eval_logps/chosen": -234.0, "eval_logps/rejected": -502.0, "eval_logits/chosen": -1.21875, "eval_logits/rejected": 2.015625, "eval_nll_loss": 0.408203125, "epoch": 2.6315789473684212, "step": 100}, {"loss": 0.316748046875, "grad_norm": 0.8245490820336031, "learning_rate": 1.70370868554659e-06, "memory(GiB)": 49.67, "train_speed(iter/s)": 0.285138, "rewards/chosen": 16.012500762939453, "rewards/rejected": 2.128124952316284, "rewards/accuracies": 1.0, "rewards/margins": 13.862500190734863, "logps/chosen": -228.60000610351562, "logps/rejected": -441.20001220703125, "logits/chosen": -0.30781251192092896, "logits/rejected": 1.142187476158142, "nll_loss": 0.31640625, "epoch": 2.763157894736842, "step": 105}, {"loss": 0.3138641357421875, "grad_norm": 0.44538296304554453, "learning_rate": 3.380821129028489e-07, "memory(GiB)": 49.67, "train_speed(iter/s)": 0.286087, "rewards/chosen": 15.850000381469727, "rewards/rejected": 1.97265625, "rewards/accuracies": 1.0, "rewards/margins": 13.862500190734863, "logps/chosen": -212.39999389648438, "logps/rejected": -445.3999938964844, "logits/chosen": -0.34589844942092896, "logits/rejected": 1.1906249523162842, "nll_loss": 0.31367188692092896, "epoch": 2.8947368421052633, "step": 110}, {"eval_loss": 0.406005859375, "eval_runtime": 1.3327, "eval_samples_per_second": 3.001, "eval_steps_per_second": 0.75, "eval_rewards/chosen": 15.125, "eval_rewards/rejected": 0.42578125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.6875, "eval_logps/chosen": -234.0, "eval_logps/rejected": -502.0, "eval_logits/chosen": -1.21875, "eval_logits/rejected": 2.015625, "eval_nll_loss": 0.40625, "epoch": 2.8947368421052633, "step": 110}, {"eval_loss": 0.407470703125, "eval_runtime": 1.2973, "eval_samples_per_second": 3.083, "eval_steps_per_second": 0.771, "eval_rewards/chosen": 15.0625, "eval_rewards/rejected": 0.57421875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.5, "eval_logps/chosen": -234.0, "eval_logps/rejected": -500.0, "eval_logits/chosen": -1.21875, "eval_logits/rejected": 2.015625, "eval_nll_loss": 0.408203125, "epoch": 3.0, "step": 114}, {"train_runtime": 400.8253, "train_samples_per_second": 2.23, "train_steps_per_second": 0.284, "total_flos": 145005522976768.0, "train_loss": 0.5842378515946237, "epoch": 3.0, "step": 114}], "memory": 49.671875} diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs/events.out.tfevents.1739624518.kml-task-540432-record-10144729-prod-worker-0.18148.0 b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs/events.out.tfevents.1739624518.kml-task-540432-record-10144729-prod-worker-0.18148.0 new file mode 100644 index 0000000000000000000000000000000000000000..8d6ac9680efe4780c5690cc291b00b61e626dd23 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_random20/v0-20250215-130040/runs/events.out.tfevents.1739624518.kml-task-540432-record-10144729-prod-worker-0.18148.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdbc9b4387ca8999f6e6d56a4fbf88a1d0fab298bddb0d9f4179e0d955b7993 +size 36895 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/args.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/args.json new file mode 100644 index 0000000000000000000000000000000000000000..abf6bbaf2688cf3210ff53d87ff14d0ddce51227 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-7b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/README.md b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e4e3f02b35c0f2feed1d84197d9fc0806a438243 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b60a9ad0460c3988479843eac97361671f44930 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "q_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f6e4bd9ee164fe7f9f7b5b63671fd8657035ee30 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3031e740f155180547f7b1d161044beb00d0373e25eb2a36c9f21d0255a6bd3e +size 40422208 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/args.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/args.json new file mode 100644 index 0000000000000000000000000000000000000000..abf6bbaf2688cf3210ff53d87ff14d0ddce51227 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-7b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6407687860ec40903a0a0eb1d728ef29a96d334 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3b4afede2467d3299adf29cd56ea3f52dfdc23d7464390bd59c7c4092f4b78 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..640fae427838b1ae06ba4d08be1ae1feca7ea511 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb10ea10447402f2534c6c9e23cc8fa24774ca445f9a01a455be27771c7d7298 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d2897177bf4d302bcb25406bd059927dc9fd51f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc861629ba9251e527ccfe83b7a7dfdf69e72f039cd1f96cbd1de15f53f39f7b +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f81c92d4738e6600236697524773923baa3e7310 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c79f18a444c16c7a79a7575bfe659ca44df97e65a63a684f8b5b92c444c7b79 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..417116f790869eb912a77b6f1cc1e07805858902 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:196a25e1fbfa36d12b4a9591b98480ca8c0f304b9faa7ff4924fd739111d1e34 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..72e3aa915a0fd562e1cbb4921095603956b5c45c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b73418e1e63328a8ab3a76426f9f4889690f2c6d64c6f3ed9b8321873167598b +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..376ffd92241ea9054d7e9d5f98683c9e5bb08c8e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:525f6b9f6eac378af40b2d3215c77789e00b947dd6529f02f7055b5136099267 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..63975cb036d0772d217815999dad23f7d28d6e7c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e05040987cf49b86ff421695f5931cdf559630a2569da53b71969e8e93ac3fa +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..51a824dd36aeeb461bcf24fdf65384617685ab58 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63fc52c4a93583d9ad1a3836a7962f19034a9214ea700213400e4e221d387ebf +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3030ad106e287244bdff00e3a480f834957bf98a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2805eac185e17dd4b95e30fc04f67d72aa81a6d73880fe5e9379cef1e5eb6849 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c64c71e0dbfaaf1d7d1180fb635af37c4883ce5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d7025a7b98bdc6b48906710b0f90ff63b74c7dcffd11ccf5d3339ab5055d27a +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f307c60d54d063683f5211762ab54a9926dee0ad --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9021c4d4ef3a89baee0711f5c7a98e7594780638178c4911da6b76c1b3d6ad58 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f17ea040e9ab6a061988521ac29614f857dbab5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f593d1f818e92016a3064904de1ca8d3c95a428a1c461d70d2b3707cc0bc08cd +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b5f8c06fd177293874596e9c2ac0303e0a197ce --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1bbf993d78c31b2e89172cc59291164bae77a5a9420476e18675c7cedc0ec6 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f56a955eeaeebf1b6c1cb4ab4150de631f41dc19 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c72726d5c73af4fdcb1da59a45a3e232a4469863c1f548f8634359802bf5015 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..97a4a5ca4846ec856e77100a09835cc006430632 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/global_step100/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f503458dbdd0f650b050f90804c781b0da4033f3af162b716d3b25fa8c0dac7f +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/latest b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/latest new file mode 100644 index 0000000000000000000000000000000000000000..744ae7dbad571b6f37ec6c7066549494261bb59e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/latest @@ -0,0 +1 @@ +global_step100 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..97f51b498d48145bd9cc14b35f8236b9ec95a4f7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1bec598899f9d59e70c1b4705ce420a1e0a670957b6c8153a589880068ae5a4 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..08e59ac81067b262a084604cd3392250166c2841 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60d2348aae518f4c44693db9c9b4b3a3299c556e7f0a86c188b2e4c3e364a7c +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..20a24c17b4be2ee59cd5e6682010519318a91e58 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe5a79d3bcb4ce033de360bc765e616316e3562aba25887cd85c4adbb935abf +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..54050f6cf8fb847e2a926e14a7aad2647761521a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a9d1f6e22677721841890e6a27855857e6840137650d609eb8e4ac13b71d29 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..263aae475c49b090bce43f143308192c5bf9a95b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac4ff84388a6a4fe3bcae6207c68b2ee5528fb3b6de8cc3588fe1975462aa5 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..942ed5d60ae87dce686b33da76a34db404036dc6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fce3cdf5c1b8a8a291e0c73b384e3ad5252640e21e942b44b26b8b0928ffa9 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..57789be3df3983cb8acc1500bf6470ffadb1c578 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:919e675f3bcaf4f3c8ba35cd8debf85aec3bbc3c8e5019b74431e0a314e4d37a +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..b32d6e2e7eb7148713b473b0c821a98e616ab6e6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf6479ce82b88efc6a72a8ee512162b3d0ecab972817296d38ab9c448bb8d96 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2a1fb08c48e9d34df783eb19e7c9d1caf0ed386 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ec37c3a15b8d061312402391f2fddb52d623a1416d6d2879a30f184450d844f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..31b4e88e78ccd4d272416b052210646288821f19 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/trainer_state.json @@ -0,0 +1,581 @@ +{ + "best_metric": 0.40795898, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90", + "epoch": 2.6315789473684212, + "eval_steps": 10, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 10.70593006577457, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": 0.828125, + "logits/rejected": -0.283203125, + "logps/chosen": -444.0, + "logps/rejected": -360.0, + "loss": 1.12939453125, + "memory(GiB)": 6.7, + "nll_loss": 0.439453125, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.113961 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.176485007231813, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -0.1298828125, + "logits/rejected": 0.03759765625, + "logps/chosen": -377.25, + "logps/rejected": -512.0, + "loss": 1.8751220703125, + "memory(GiB)": 16.45, + "nll_loss": 1.1748046875, + "rewards/accuracies": 0.1875, + "rewards/chosen": 0.01715087890625, + "rewards/margins": 0.0078125, + "rewards/rejected": 0.009368896484375, + "step": 5, + "train_speed(iter/s)": 0.234379 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 12.015147423707822, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": 0.07973632961511612, + "logits/rejected": 0.19414062798023224, + "logps/chosen": -462.79998779296875, + "logps/rejected": -507.20001220703125, + "loss": 2.12158203125, + "memory(GiB)": 40.52, + "nll_loss": 1.553125023841858, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.702929675579071, + "rewards/margins": 0.4051757752895355, + "rewards/rejected": 0.2975097596645355, + "step": 10, + "train_speed(iter/s)": 0.254866 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.03125, + "eval_logps/chosen": -360.0, + "eval_logps/rejected": -496.0, + "eval_loss": 1.19140625, + "eval_nll_loss": 0.796875, + "eval_rewards/accuracies": 0.75, + "eval_rewards/chosen": 2.40625, + "eval_rewards/margins": 1.4765625, + "eval_rewards/rejected": 0.92578125, + "eval_runtime": 1.3322, + "eval_samples_per_second": 3.002, + "eval_steps_per_second": 0.751, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 9.41426350911575, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -0.29057615995407104, + "logits/rejected": 0.11259765923023224, + "logps/chosen": -353.6000061035156, + "logps/rejected": -487.6000061035156, + "loss": 1.001171875, + "memory(GiB)": 40.52, + "nll_loss": 0.731249988079071, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.364062547683716, + "rewards/margins": 2.234375, + "rewards/rejected": 1.1257812976837158, + "step": 15, + "train_speed(iter/s)": 0.266928 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 3.417766982037976, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -0.8324218988418579, + "logits/rejected": 0.4007812440395355, + "logps/chosen": -258.20001220703125, + "logps/rejected": -377.6000061035156, + "loss": 1.1167236328125, + "memory(GiB)": 40.52, + "nll_loss": 0.922656238079071, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.8125, + "rewards/margins": 3.262500047683716, + "rewards/rejected": 3.543750047683716, + "step": 20, + "train_speed(iter/s)": 0.275171 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.0859375, + "eval_logps/chosen": -298.0, + "eval_logps/rejected": -458.0, + "eval_loss": 0.666015625, + "eval_nll_loss": 0.5625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 8.6875, + "eval_rewards/margins": 3.9375, + "eval_rewards/rejected": 4.78125, + "eval_runtime": 1.3177, + "eval_samples_per_second": 3.036, + "eval_steps_per_second": 0.759, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 2.721982515360601, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -0.3814453184604645, + "logits/rejected": 0.09628906100988388, + "logps/chosen": -373.6000061035156, + "logps/rejected": -422.20001220703125, + "loss": 0.673486328125, + "memory(GiB)": 40.52, + "nll_loss": 0.600781261920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.074999809265137, + "rewards/margins": 4.393750190734863, + "rewards/rejected": 4.684374809265137, + "step": 25, + "train_speed(iter/s)": 0.274036 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.9866084312671491, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -0.5782226324081421, + "logits/rejected": 0.4605468809604645, + "logps/chosen": -250.6999969482422, + "logps/rejected": -412.79998779296875, + "loss": 0.50615234375, + "memory(GiB)": 40.52, + "nll_loss": 0.48750001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.949999809265137, + "rewards/margins": 6.068749904632568, + "rewards/rejected": 4.884375095367432, + "step": 30, + "train_speed(iter/s)": 0.277362 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -2.078125, + "eval_logits/rejected": 1.203125, + "eval_logps/chosen": -272.0, + "eval_logps/rejected": -468.0, + "eval_loss": 0.49609375, + "eval_nll_loss": 0.49609375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.3125, + "eval_rewards/margins": 7.5, + "eval_rewards/rejected": 3.78125, + "eval_runtime": 1.3623, + "eval_samples_per_second": 2.936, + "eval_steps_per_second": 0.734, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 1.2141834186198552, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -0.512499988079071, + "logits/rejected": 0.31171876192092896, + "logps/chosen": -379.20001220703125, + "logps/rejected": -446.0, + "loss": 0.4916259765625, + "memory(GiB)": 40.52, + "nll_loss": 0.4892578125, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.012499809265137, + "rewards/margins": 8.487500190734863, + "rewards/rejected": 3.543750047683716, + "step": 35, + "train_speed(iter/s)": 0.275044 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 1.0755142345948945, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -0.24981689453125, + "logits/rejected": 0.541796863079071, + "logps/chosen": -271.3999938964844, + "logps/rejected": -482.3999938964844, + "loss": 0.45865478515625, + "memory(GiB)": 40.52, + "nll_loss": 0.563281238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.9375, + "rewards/margins": 9.762499809265137, + "rewards/rejected": 2.1859374046325684, + "step": 40, + "train_speed(iter/s)": 0.278234 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.7734375, + "eval_logits/rejected": 1.65625, + "eval_logps/chosen": -253.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.4462890625, + "eval_nll_loss": 0.4453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.125, + "eval_rewards/margins": 12.125, + "eval_rewards/rejected": 1.0, + "eval_runtime": 1.291, + "eval_samples_per_second": 3.098, + "eval_steps_per_second": 0.775, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.6599652846962414, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -0.24863281846046448, + "logits/rejected": 0.8564453125, + "logps/chosen": -306.20001220703125, + "logps/rejected": -480.0, + "loss": 0.44638671875, + "memory(GiB)": 40.52, + "nll_loss": 0.44648438692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.050000190734863, + "rewards/margins": 12.537500381469727, + "rewards/rejected": 1.506250023841858, + "step": 45, + "train_speed(iter/s)": 0.276176 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.5744853865576506, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": 0.05844726413488388, + "logits/rejected": 1.05859375, + "logps/chosen": -300.20001220703125, + "logps/rejected": -461.6000061035156, + "loss": 0.434783935546875, + "memory(GiB)": 40.52, + "nll_loss": 0.4339843690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.8125, + "rewards/margins": 12.762499809265137, + "rewards/rejected": 2.0621094703674316, + "step": 50, + "train_speed(iter/s)": 0.279108 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.5546875, + "eval_logits/rejected": 1.9296875, + "eval_logps/chosen": -242.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.425537109375, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.25, + "eval_rewards/margins": 13.25, + "eval_rewards/rejected": 0.9765625, + "eval_runtime": 1.3278, + "eval_samples_per_second": 3.013, + "eval_steps_per_second": 0.753, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2926328226891597, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -0.1898193359375, + "logits/rejected": 1.259374976158142, + "logps/chosen": -261.20001220703125, + "logps/rejected": -431.20001220703125, + "loss": 0.40357666015625, + "memory(GiB)": 40.52, + "nll_loss": 0.40234375, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.324999809265137, + "rewards/margins": 12.612500190734863, + "rewards/rejected": 1.704687476158142, + "step": 55, + "train_speed(iter/s)": 0.280714 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.24140281258606347, + "learning_rate": 5e-05, + "logits/chosen": -0.07255859673023224, + "logits/rejected": 0.758007824420929, + "logps/chosen": -263.3999938964844, + "logps/rejected": -405.20001220703125, + "loss": 0.28531494140625, + "memory(GiB)": 40.52, + "nll_loss": 0.28496092557907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.337499618530273, + "rewards/margins": 12.774999618530273, + "rewards/rejected": 2.5546875, + "step": 60, + "train_speed(iter/s)": 0.283696 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.40625, + "eval_logits/rejected": 1.96875, + "eval_logps/chosen": -241.0, + "eval_logps/rejected": -498.0, + "eval_loss": 0.425048828125, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.375, + "eval_rewards/margins": 13.5625, + "eval_rewards/rejected": 0.80078125, + "eval_runtime": 1.3237, + "eval_samples_per_second": 3.022, + "eval_steps_per_second": 0.755, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.6094457836784298, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -0.5101562738418579, + "logits/rejected": 1.0482909679412842, + "logps/chosen": -261.0, + "logps/rejected": -409.6000061035156, + "loss": 0.3310546875, + "memory(GiB)": 40.52, + "nll_loss": 0.33125001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.774999618530273, + "rewards/margins": 14.050000190734863, + "rewards/rejected": 1.736328125, + "step": 65, + "train_speed(iter/s)": 0.283196 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.45260618201827363, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": 0.03581542894244194, + "logits/rejected": 1.0457031726837158, + "logps/chosen": -193.0, + "logps/rejected": -501.6000061035156, + "loss": 0.32975921630859373, + "memory(GiB)": 40.52, + "nll_loss": 0.3296875059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.487500190734863, + "rewards/margins": 12.287500381469727, + "rewards/rejected": 2.176562547683716, + "step": 70, + "train_speed(iter/s)": 0.283818 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.3203125, + "eval_logits/rejected": 1.9765625, + "eval_logps/chosen": -237.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.41455078125, + "eval_nll_loss": 0.4140625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.75, + "eval_rewards/margins": 14.0625, + "eval_rewards/rejected": 0.6796875, + "eval_runtime": 1.3685, + "eval_samples_per_second": 2.923, + "eval_steps_per_second": 0.731, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.7812661467833826, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -0.42255860567092896, + "logits/rejected": 1.28125, + "logps/chosen": -298.0, + "logps/rejected": -456.79998779296875, + "loss": 0.40250244140625, + "memory(GiB)": 40.52, + "nll_loss": 0.4027343690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.800000190734863, + "rewards/margins": 15.125, + "rewards/rejected": 0.692187488079071, + "step": 75, + "train_speed(iter/s)": 0.283316 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5488588034556375, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -0.18242187798023224, + "logits/rejected": 1.212890625, + "logps/chosen": -239.39999389648438, + "logps/rejected": -437.20001220703125, + "loss": 0.3640625, + "memory(GiB)": 40.52, + "nll_loss": 0.38066405057907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.3125, + "rewards/margins": 13.925000190734863, + "rewards/rejected": 1.3738281726837158, + "step": 80, + "train_speed(iter/s)": 0.282344 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.3046875, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -235.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.410400390625, + "eval_nll_loss": 0.41015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.9375, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": 0.375, + "eval_runtime": 1.3645, + "eval_samples_per_second": 2.931, + "eval_steps_per_second": 0.733, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.5977630685878709, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -0.2928222715854645, + "logits/rejected": 1.396875023841858, + "logps/chosen": -250.60000610351562, + "logps/rejected": -473.6000061035156, + "loss": 0.339306640625, + "memory(GiB)": 40.52, + "nll_loss": 0.3388671875, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.162500381469727, + "rewards/margins": 15.362500190734863, + "rewards/rejected": 0.8084961175918579, + "step": 85, + "train_speed(iter/s)": 0.282252 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.2955118166701661, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -0.35551756620407104, + "logits/rejected": 0.749218761920929, + "logps/chosen": -254.39999389648438, + "logps/rejected": -422.0, + "loss": 0.3234832763671875, + "memory(GiB)": 40.52, + "nll_loss": 0.3232421875, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.237499237060547, + "rewards/margins": 14.3125, + "rewards/rejected": 1.9640624523162842, + "step": 90, + "train_speed(iter/s)": 0.285022 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.265625, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.407958984375, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.0625, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": 0.5, + "eval_runtime": 1.3391, + "eval_samples_per_second": 2.987, + "eval_steps_per_second": 0.747, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.25565176186037897, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -0.21904297173023224, + "logits/rejected": 1.0421874523162842, + "logps/chosen": -312.6000061035156, + "logps/rejected": -527.5999755859375, + "loss": 0.41697998046875, + "memory(GiB)": 40.52, + "nll_loss": 0.41621094942092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.387500762939453, + "rewards/margins": 16.700000762939453, + "rewards/rejected": 0.702099621295929, + "step": 95, + "train_speed(iter/s)": 0.283485 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.44788320009382443, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -0.5326172113418579, + "logits/rejected": 1.3898437023162842, + "logps/chosen": -245.89999389648438, + "logps/rejected": -406.79998779296875, + "loss": 0.351910400390625, + "memory(GiB)": 49.77, + "nll_loss": 0.35175782442092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.462499618530273, + "rewards/margins": 15.600000381469727, + "rewards/rejected": 0.850390613079071, + "step": 100, + "train_speed(iter/s)": 0.28525 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -1.265625, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.408935546875, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.0625, + "eval_rewards/margins": 14.5, + "eval_rewards/rejected": 0.6015625, + "eval_runtime": 1.3384, + "eval_samples_per_second": 2.989, + "eval_steps_per_second": 0.747, + "step": 100 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 127458152022016.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e926cd924b33a09827843ef082da1793fb49fece --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:512ce35c3cb0d098001365fc17b658c77df2485cba0600e08e62a9d5e66bbbec +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-100/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/README.md b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e4e3f02b35c0f2feed1d84197d9fc0806a438243 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b60a9ad0460c3988479843eac97361671f44930 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "q_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac97c4f5da6851f9949be90f3c7ea78f9061d49d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40312fd20e645c776f9508c417a071e212585d439e6b757e71e0c82f00ea5297 +size 40422208 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/args.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/args.json new file mode 100644 index 0000000000000000000000000000000000000000..abf6bbaf2688cf3210ff53d87ff14d0ddce51227 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-7b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9fdd2574d13e01e0b17274a56a467f877e6038bb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5848c714643a7132cc2030ccd7e37d611b951ecc0f4af42638c9af61d755b7e5 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..51b006045c4ed43285f670e41ac8573df8ecd7d9 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e18964d0572211545a3cd0f635e2b27483f36b65b630751a4ebbef9880b7db66 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..59a3bb93697dd04ded8bc4d4619c3fa351332d91 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f24d758e7648cf35fa8d9729e373584572adc9e07566f7d40a756bc404c0309a +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b8abcf88b1b523d0136a32bdc100442d7a2e1d7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67b9de9ebe50e09d9ca10a64207fc5bdee09bb32e139d2b5e74ecfcc527a13c6 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b4b9209c920176bb7a6e92a02ae93dc3de05f06 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cad2b59e8c1db9381572f5a7d2b4c571c94d101252567a7d3c3ac3314a87da4 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b58fbbca406a864bef627ab349ea9776604a42b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cd27fa1fd93259065b1db31c4628ffc35b3b8753aee1730954760bc3ad85201 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..72fd046f1d1fb925ec2e84192ab7f53a0a5d6ad0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:914483f74ecf205f630ba939f2c3af4880dfc22bcac24459b78744e34879a467 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c07c04568eca3816c39f93974d9e1fe77c14e55 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faef1f1ff802a3f1faa5026d5cfe178ab7863ccecb3d304bd0546bd5e83f353a +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..152789dd54327e6bfa4fc820a0bb35567d8ca597 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1d8860ebfb197407ec768ac07edafa6fbe62350e351b9a6859375109af5d46e +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..213587e5d6220fff5573be8837fd2529325ea787 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b298319a704222bb170c1977523cd9aeb371872f676956fe5047c57fa019aa38 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9e0eb4554f898474e8d85cdf1329c4ead2e2d44 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e2141c1ede05cf6baf7bda959a8dc8dc015529b9ce5475efcd12eec0ee952de +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0c89b1d555f93e66af22a2a6e7018ff4784699c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c87e55263184e34cf82d3762556c0fee21cf35afc58bab08c688d06105152f71 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..95dc8801d4bb5301b03d9e2d0b22cc215722b04c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:203bc67f9659d5a561c1eb736a757985bbda37084e7bc5222b44852cefe65f43 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c091ec8448104ec15b660995658f17a1e659ab8c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be44f9c57bcc8de987570de0ccfdfc12ddace178b9ce5f5396cf80aaf03188d1 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0379844db5a48fb4b0012e36e950793300c0bff4 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:331865231d99b88cf110bdacb756e993887582890cd1e660099892c63c7235b9 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3396d0c7a746ba511bf20806f96eee8ef4e051a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/global_step110/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a9dabfe01f64d9eedc1813c16b704063809df49b747a84e43aedc884180f24 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/latest b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/latest new file mode 100644 index 0000000000000000000000000000000000000000..a9a22a69382a7711ca9e8ab6945c6d2cc8984927 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/latest @@ -0,0 +1 @@ +global_step110 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..584f4a4a43f100f35696d7314a633631af587f25 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7891ffa7c7dae99113aa986d67278b52b8c57db55001dc3547a61f24569a34ee +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..05b027a867e5e9cebd446293ecff82cfb240cc76 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b92875cb04deec367605433847d1bda444b178b643d2da7ed9aaf738d232b4 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..af98f0dfe2a5d89fbccf90df58246a0b078c7016 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f5f3338a05e325b5408a1cd0b6f5e5b10fad05fe479d63f44bec4cf18107d6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..715aa4a4ee3915f810fc2bacb2153eb8a0913781 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1be749fea477a3867d44010631937e0d8f071ca5f9614f9795c92c7fa68833a6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..c7bde70899833455b6ee4a99aff9388abc5ffe92 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbc4a5ea4532c621f4c8e9891117b2e597a7f005001e8b4f2a1b4da8c82bf964 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..90cdeaa2fe438098e9d95ddbc06c765e51af1e78 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480f9fe7dd71b54d915b46162e34b780ba2467d5542115cc809dbca60b394c0e +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..2bd30529614c5be239cd9477af6bef0e313740b6 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11d982dcd813e82c2d97a5491ce9624cff2dd22e8655ea617ccef1fc1474470 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..bed311094effd49cc2c89237c675f56eade157d1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73494fac3a001cba7cedd097b97f028d4c1d136ee6709214b0a7fe305e5b9089 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..38b1a376e7c81e3c533cf8a69ddf4eefa9d1336c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0767a9fe84680a5a8a76633a443cb301092115c026c1f5f7f1fbdc53dd7f856f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..efc0dd9d7f6d969ca497c7dcaad46c251b5c28df --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/trainer_state.json @@ -0,0 +1,634 @@ +{ + "best_metric": 0.40771484, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110", + "epoch": 2.8947368421052633, + "eval_steps": 10, + "global_step": 110, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 10.70593006577457, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": 0.828125, + "logits/rejected": -0.283203125, + "logps/chosen": -444.0, + "logps/rejected": -360.0, + "loss": 1.12939453125, + "memory(GiB)": 6.7, + "nll_loss": 0.439453125, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.113961 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.176485007231813, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -0.1298828125, + "logits/rejected": 0.03759765625, + "logps/chosen": -377.25, + "logps/rejected": -512.0, + "loss": 1.8751220703125, + "memory(GiB)": 16.45, + "nll_loss": 1.1748046875, + "rewards/accuracies": 0.1875, + "rewards/chosen": 0.01715087890625, + "rewards/margins": 0.0078125, + "rewards/rejected": 0.009368896484375, + "step": 5, + "train_speed(iter/s)": 0.234379 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 12.015147423707822, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": 0.07973632961511612, + "logits/rejected": 0.19414062798023224, + "logps/chosen": -462.79998779296875, + "logps/rejected": -507.20001220703125, + "loss": 2.12158203125, + "memory(GiB)": 40.52, + "nll_loss": 1.553125023841858, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.702929675579071, + "rewards/margins": 0.4051757752895355, + "rewards/rejected": 0.2975097596645355, + "step": 10, + "train_speed(iter/s)": 0.254866 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.03125, + "eval_logps/chosen": -360.0, + "eval_logps/rejected": -496.0, + "eval_loss": 1.19140625, + "eval_nll_loss": 0.796875, + "eval_rewards/accuracies": 0.75, + "eval_rewards/chosen": 2.40625, + "eval_rewards/margins": 1.4765625, + "eval_rewards/rejected": 0.92578125, + "eval_runtime": 1.3322, + "eval_samples_per_second": 3.002, + "eval_steps_per_second": 0.751, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 9.41426350911575, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -0.29057615995407104, + "logits/rejected": 0.11259765923023224, + "logps/chosen": -353.6000061035156, + "logps/rejected": -487.6000061035156, + "loss": 1.001171875, + "memory(GiB)": 40.52, + "nll_loss": 0.731249988079071, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.364062547683716, + "rewards/margins": 2.234375, + "rewards/rejected": 1.1257812976837158, + "step": 15, + "train_speed(iter/s)": 0.266928 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 3.417766982037976, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -0.8324218988418579, + "logits/rejected": 0.4007812440395355, + "logps/chosen": -258.20001220703125, + "logps/rejected": -377.6000061035156, + "loss": 1.1167236328125, + "memory(GiB)": 40.52, + "nll_loss": 0.922656238079071, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.8125, + "rewards/margins": 3.262500047683716, + "rewards/rejected": 3.543750047683716, + "step": 20, + "train_speed(iter/s)": 0.275171 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.0859375, + "eval_logps/chosen": -298.0, + "eval_logps/rejected": -458.0, + "eval_loss": 0.666015625, + "eval_nll_loss": 0.5625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 8.6875, + "eval_rewards/margins": 3.9375, + "eval_rewards/rejected": 4.78125, + "eval_runtime": 1.3177, + "eval_samples_per_second": 3.036, + "eval_steps_per_second": 0.759, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 2.721982515360601, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -0.3814453184604645, + "logits/rejected": 0.09628906100988388, + "logps/chosen": -373.6000061035156, + "logps/rejected": -422.20001220703125, + "loss": 0.673486328125, + "memory(GiB)": 40.52, + "nll_loss": 0.600781261920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.074999809265137, + "rewards/margins": 4.393750190734863, + "rewards/rejected": 4.684374809265137, + "step": 25, + "train_speed(iter/s)": 0.274036 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.9866084312671491, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -0.5782226324081421, + "logits/rejected": 0.4605468809604645, + "logps/chosen": -250.6999969482422, + "logps/rejected": -412.79998779296875, + "loss": 0.50615234375, + "memory(GiB)": 40.52, + "nll_loss": 0.48750001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.949999809265137, + "rewards/margins": 6.068749904632568, + "rewards/rejected": 4.884375095367432, + "step": 30, + "train_speed(iter/s)": 0.277362 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -2.078125, + "eval_logits/rejected": 1.203125, + "eval_logps/chosen": -272.0, + "eval_logps/rejected": -468.0, + "eval_loss": 0.49609375, + "eval_nll_loss": 0.49609375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.3125, + "eval_rewards/margins": 7.5, + "eval_rewards/rejected": 3.78125, + "eval_runtime": 1.3623, + "eval_samples_per_second": 2.936, + "eval_steps_per_second": 0.734, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 1.2141834186198552, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -0.512499988079071, + "logits/rejected": 0.31171876192092896, + "logps/chosen": -379.20001220703125, + "logps/rejected": -446.0, + "loss": 0.4916259765625, + "memory(GiB)": 40.52, + "nll_loss": 0.4892578125, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.012499809265137, + "rewards/margins": 8.487500190734863, + "rewards/rejected": 3.543750047683716, + "step": 35, + "train_speed(iter/s)": 0.275044 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 1.0755142345948945, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -0.24981689453125, + "logits/rejected": 0.541796863079071, + "logps/chosen": -271.3999938964844, + "logps/rejected": -482.3999938964844, + "loss": 0.45865478515625, + "memory(GiB)": 40.52, + "nll_loss": 0.563281238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.9375, + "rewards/margins": 9.762499809265137, + "rewards/rejected": 2.1859374046325684, + "step": 40, + "train_speed(iter/s)": 0.278234 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.7734375, + "eval_logits/rejected": 1.65625, + "eval_logps/chosen": -253.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.4462890625, + "eval_nll_loss": 0.4453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.125, + "eval_rewards/margins": 12.125, + "eval_rewards/rejected": 1.0, + "eval_runtime": 1.291, + "eval_samples_per_second": 3.098, + "eval_steps_per_second": 0.775, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.6599652846962414, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -0.24863281846046448, + "logits/rejected": 0.8564453125, + "logps/chosen": -306.20001220703125, + "logps/rejected": -480.0, + "loss": 0.44638671875, + "memory(GiB)": 40.52, + "nll_loss": 0.44648438692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.050000190734863, + "rewards/margins": 12.537500381469727, + "rewards/rejected": 1.506250023841858, + "step": 45, + "train_speed(iter/s)": 0.276176 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.5744853865576506, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": 0.05844726413488388, + "logits/rejected": 1.05859375, + "logps/chosen": -300.20001220703125, + "logps/rejected": -461.6000061035156, + "loss": 0.434783935546875, + "memory(GiB)": 40.52, + "nll_loss": 0.4339843690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.8125, + "rewards/margins": 12.762499809265137, + "rewards/rejected": 2.0621094703674316, + "step": 50, + "train_speed(iter/s)": 0.279108 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.5546875, + "eval_logits/rejected": 1.9296875, + "eval_logps/chosen": -242.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.425537109375, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.25, + "eval_rewards/margins": 13.25, + "eval_rewards/rejected": 0.9765625, + "eval_runtime": 1.3278, + "eval_samples_per_second": 3.013, + "eval_steps_per_second": 0.753, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2926328226891597, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -0.1898193359375, + "logits/rejected": 1.259374976158142, + "logps/chosen": -261.20001220703125, + "logps/rejected": -431.20001220703125, + "loss": 0.40357666015625, + "memory(GiB)": 40.52, + "nll_loss": 0.40234375, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.324999809265137, + "rewards/margins": 12.612500190734863, + "rewards/rejected": 1.704687476158142, + "step": 55, + "train_speed(iter/s)": 0.280714 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.24140281258606347, + "learning_rate": 5e-05, + "logits/chosen": -0.07255859673023224, + "logits/rejected": 0.758007824420929, + "logps/chosen": -263.3999938964844, + "logps/rejected": -405.20001220703125, + "loss": 0.28531494140625, + "memory(GiB)": 40.52, + "nll_loss": 0.28496092557907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.337499618530273, + "rewards/margins": 12.774999618530273, + "rewards/rejected": 2.5546875, + "step": 60, + "train_speed(iter/s)": 0.283696 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.40625, + "eval_logits/rejected": 1.96875, + "eval_logps/chosen": -241.0, + "eval_logps/rejected": -498.0, + "eval_loss": 0.425048828125, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.375, + "eval_rewards/margins": 13.5625, + "eval_rewards/rejected": 0.80078125, + "eval_runtime": 1.3237, + "eval_samples_per_second": 3.022, + "eval_steps_per_second": 0.755, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.6094457836784298, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -0.5101562738418579, + "logits/rejected": 1.0482909679412842, + "logps/chosen": -261.0, + "logps/rejected": -409.6000061035156, + "loss": 0.3310546875, + "memory(GiB)": 40.52, + "nll_loss": 0.33125001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.774999618530273, + "rewards/margins": 14.050000190734863, + "rewards/rejected": 1.736328125, + "step": 65, + "train_speed(iter/s)": 0.283196 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.45260618201827363, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": 0.03581542894244194, + "logits/rejected": 1.0457031726837158, + "logps/chosen": -193.0, + "logps/rejected": -501.6000061035156, + "loss": 0.32975921630859373, + "memory(GiB)": 40.52, + "nll_loss": 0.3296875059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.487500190734863, + "rewards/margins": 12.287500381469727, + "rewards/rejected": 2.176562547683716, + "step": 70, + "train_speed(iter/s)": 0.283818 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.3203125, + "eval_logits/rejected": 1.9765625, + "eval_logps/chosen": -237.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.41455078125, + "eval_nll_loss": 0.4140625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.75, + "eval_rewards/margins": 14.0625, + "eval_rewards/rejected": 0.6796875, + "eval_runtime": 1.3685, + "eval_samples_per_second": 2.923, + "eval_steps_per_second": 0.731, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.7812661467833826, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -0.42255860567092896, + "logits/rejected": 1.28125, + "logps/chosen": -298.0, + "logps/rejected": -456.79998779296875, + "loss": 0.40250244140625, + "memory(GiB)": 40.52, + "nll_loss": 0.4027343690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.800000190734863, + "rewards/margins": 15.125, + "rewards/rejected": 0.692187488079071, + "step": 75, + "train_speed(iter/s)": 0.283316 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5488588034556375, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -0.18242187798023224, + "logits/rejected": 1.212890625, + "logps/chosen": -239.39999389648438, + "logps/rejected": -437.20001220703125, + "loss": 0.3640625, + "memory(GiB)": 40.52, + "nll_loss": 0.38066405057907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.3125, + "rewards/margins": 13.925000190734863, + "rewards/rejected": 1.3738281726837158, + "step": 80, + "train_speed(iter/s)": 0.282344 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.3046875, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -235.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.410400390625, + "eval_nll_loss": 0.41015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.9375, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": 0.375, + "eval_runtime": 1.3645, + "eval_samples_per_second": 2.931, + "eval_steps_per_second": 0.733, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.5977630685878709, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -0.2928222715854645, + "logits/rejected": 1.396875023841858, + "logps/chosen": -250.60000610351562, + "logps/rejected": -473.6000061035156, + "loss": 0.339306640625, + "memory(GiB)": 40.52, + "nll_loss": 0.3388671875, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.162500381469727, + "rewards/margins": 15.362500190734863, + "rewards/rejected": 0.8084961175918579, + "step": 85, + "train_speed(iter/s)": 0.282252 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.2955118166701661, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -0.35551756620407104, + "logits/rejected": 0.749218761920929, + "logps/chosen": -254.39999389648438, + "logps/rejected": -422.0, + "loss": 0.3234832763671875, + "memory(GiB)": 40.52, + "nll_loss": 0.3232421875, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.237499237060547, + "rewards/margins": 14.3125, + "rewards/rejected": 1.9640624523162842, + "step": 90, + "train_speed(iter/s)": 0.285022 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.265625, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.407958984375, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.0625, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": 0.5, + "eval_runtime": 1.3391, + "eval_samples_per_second": 2.987, + "eval_steps_per_second": 0.747, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.25565176186037897, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -0.21904297173023224, + "logits/rejected": 1.0421874523162842, + "logps/chosen": -312.6000061035156, + "logps/rejected": -527.5999755859375, + "loss": 0.41697998046875, + "memory(GiB)": 40.52, + "nll_loss": 0.41621094942092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.387500762939453, + "rewards/margins": 16.700000762939453, + "rewards/rejected": 0.702099621295929, + "step": 95, + "train_speed(iter/s)": 0.283485 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.44788320009382443, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -0.5326172113418579, + "logits/rejected": 1.3898437023162842, + "logps/chosen": -245.89999389648438, + "logps/rejected": -406.79998779296875, + "loss": 0.351910400390625, + "memory(GiB)": 49.77, + "nll_loss": 0.35175782442092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.462499618530273, + "rewards/margins": 15.600000381469727, + "rewards/rejected": 0.850390613079071, + "step": 100, + "train_speed(iter/s)": 0.28525 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -1.265625, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.408935546875, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.0625, + "eval_rewards/margins": 14.5, + "eval_rewards/rejected": 0.6015625, + "eval_runtime": 1.3384, + "eval_samples_per_second": 2.989, + "eval_steps_per_second": 0.747, + "step": 100 + }, + { + "epoch": 2.763157894736842, + "grad_norm": 0.7897418622893607, + "learning_rate": 1.70370868554659e-06, + "logits/chosen": -0.3160156309604645, + "logits/rejected": 1.130468726158142, + "logps/chosen": -228.39999389648438, + "logps/rejected": -440.79998779296875, + "loss": 0.316326904296875, + "memory(GiB)": 49.77, + "nll_loss": 0.3160156309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.012500762939453, + "rewards/margins": 13.800000190734863, + "rewards/rejected": 2.2035155296325684, + "step": 105, + "train_speed(iter/s)": 0.284651 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 0.45474075769771194, + "learning_rate": 3.380821129028489e-07, + "logits/chosen": -0.35332030057907104, + "logits/rejected": 1.181249976158142, + "logps/chosen": -212.39999389648438, + "logps/rejected": -443.6000061035156, + "loss": 0.31457977294921874, + "memory(GiB)": 49.77, + "nll_loss": 0.314453125, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.824999809265137, + "rewards/margins": 13.699999809265137, + "rewards/rejected": 2.114062547683716, + "step": 110, + "train_speed(iter/s)": 0.285606 + }, + { + "epoch": 2.8947368421052633, + "eval_logits/chosen": -1.265625, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.40771484375, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.125, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": 0.5, + "eval_runtime": 1.3701, + "eval_samples_per_second": 2.92, + "eval_steps_per_second": 0.73, + "step": 110 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 140021774417920.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e926cd924b33a09827843ef082da1793fb49fece --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:512ce35c3cb0d098001365fc17b658c77df2485cba0600e08e62a9d5e66bbbec +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-110/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/README.md b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e4e3f02b35c0f2feed1d84197d9fc0806a438243 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b60a9ad0460c3988479843eac97361671f44930 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "q_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4417aee64aa8099181654d3e9316dc87f746b10a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19031aeecfc1700d4cc10ba8a6478ae2146470d24a974dd247a09655d0eda6dd +size 40422208 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/args.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/args.json new file mode 100644 index 0000000000000000000000000000000000000000..abf6bbaf2688cf3210ff53d87ff14d0ddce51227 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-7b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..238ae393df7fd01cb0b74b067cdf71ceaeda9380 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1b9f8d534e31cc6ba812ecde774e736e3afa0c1f4b7844ee0d6a4148c0b9424 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd21f195e9cb25e9303a7d9bfb2083ea4932f3a0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecf9bb60a769f1111c2f2672d87b77f1bffa049d7235f681ce77ddf6cfc00340 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..faac1fccf6567018a4498a45e8835ff2cc4c2d2f --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81aa75fa3b35a148ec2e4e6d994afc344d2e81d4a69cf86db9814532f420eff0 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6751d0e5e1c95f6e19a78941cabafc3f20652288 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b904a60d53c84be787884253a355aa95902127e638b751838d052a940595e29e +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f56d544d787127055e2b9e4b382110df7c362a62 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:782d196dbcdc3ca762ed117510166dea03f4cd0b08bfe478f6a03c5e62b7ed1b +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e3e1ed2661aa65af8b57065aec61c414438d229 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d2cd5c8ecf479f00a809727a18642b72a1083c3e69f151c103adef17a91d9c2 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..aaffa3f1080669f8284b5a862de49a3472c54c40 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221eb556d33905341233978602f1593223dea028edf7aa5d88d689600de50fcf +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fbef86fd493144b497acc3f99453f1e4c88b9218 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c83e22b68fa7017b1647d292ba48011256c3c4c897008b0523ae52521369c303 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8105dae45351bc9cd8b211c09f37bb5810d95eec --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b7abcceb9145df5a06a246581437a97934a2f8604dbf3c79a9a24c8810082f +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fe29e9f02f4c93f6a4a80ed47ee493663ea2869 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:047538f1cc9f7c5ced220fbb45f0ca9f60490275935b59b4ffa604e31036970b +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..73562b6988c82a4a8c844c641c8f3d6f330e77ed --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6ca18f7097ff28a90d8330b59dff6517f5dd7fcf4d416ce0ca9e03963aeff92 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..753f6a2f1281abee13b649b05f5562881ad567c0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c45b61d8ca6f2fb536ccc41020c68cdc99525321ad7e8f0559c0cd39e64ec647 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c5678c679c42abe517d73591596fdc55c4ae36c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1544cf6f6b337811d0d66e2ab7c960f567172e6768154013d3ad5bf104780e7 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..645d841d3b4fc8aec42bb33a8e158e33118a9e86 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50d991fd917a72eecd18f87f0933d1de6f0f5d4868c46317960defa5318a7ac1 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc20e3e0e7f464801f035b049d529536f9456b86 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1788ae209de6f083ea5e6d315e60f7ca77e5e12c3bfb1af3c47049d284bc1fe3 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..16318a8152fc653ccbb55a440f9c7789db0458bf --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/global_step114/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:338e769473a5bd5e6a6e71459d5ff3a55f43595da5cc253baed8259c1dcd7cb9 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/latest b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/latest new file mode 100644 index 0000000000000000000000000000000000000000..aad80f76777fd4d23b0b81026f4601524335cbe1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/latest @@ -0,0 +1 @@ +global_step114 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..80f51268a9828e9592a20d8ae8b2cd4ba4bc362c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d994b317c4df888a1a1aabc0c532e81f1fa34c18c8313cb2feadca3bb37194 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..62e581603d525611f9660b6e859462f72bbc9258 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5b05860618aa49c7f5d8c366d6ee73cf8b3b0d0adc17d9313b72621630d0aa +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..18b03e632222a58e33ea4fca874b9c52628cc5e1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7262faf861e984775b4fd85bc76a11b0b8b04037690e8a08a58cf9ff5328a042 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..41735b5f7ace13ffa57ebed3e7042f1a48ac17fb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9479cad91150e2e266d17eb95fe678579a770f6df6b53496cf72067b186b094d +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..ebbb93c1d99b1645075ea27fc9fae66992a691f5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:435cb6cf559e0ce3fe0d4582cac16ea40b48b7a64589952402a4c399cafbfc00 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0656f84b77a33c9ceba9df16f36437b55ef71bc7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51001b0d8dc5792180c3a9705ccbfa66b61d46d7639afb6f7abf409629ed74f +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..32b3a313372ee4a2eeaeed69789f8fb4e2c70ad0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1e87084f11088fdce293e1fbbb05e35f5c7385b00e2f9ba195bf61cb36f757d +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..9c5a45264129fe1d7c409a6867de1a9751476a8e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d32e9bdd65145ae509e6c6ef4f6ea9d842f94a34c34a0d7d2ab6c248d3f2121 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a86ac614a477eb67963adb2c8c07f37c79ded059 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d7a9fd18bda7faa50931342147a7de5605bed0f91f6c70d821e84b7bf8f444f +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..84481598cc849289d6eb2d5d49e745323b65c621 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/trainer_state.json @@ -0,0 +1,651 @@ +{ + "best_metric": 0.40698242, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114", + "epoch": 3.0, + "eval_steps": 10, + "global_step": 114, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 10.70593006577457, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": 0.828125, + "logits/rejected": -0.283203125, + "logps/chosen": -444.0, + "logps/rejected": -360.0, + "loss": 1.12939453125, + "memory(GiB)": 6.7, + "nll_loss": 0.439453125, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.113961 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.176485007231813, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -0.1298828125, + "logits/rejected": 0.03759765625, + "logps/chosen": -377.25, + "logps/rejected": -512.0, + "loss": 1.8751220703125, + "memory(GiB)": 16.45, + "nll_loss": 1.1748046875, + "rewards/accuracies": 0.1875, + "rewards/chosen": 0.01715087890625, + "rewards/margins": 0.0078125, + "rewards/rejected": 0.009368896484375, + "step": 5, + "train_speed(iter/s)": 0.234379 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 12.015147423707822, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": 0.07973632961511612, + "logits/rejected": 0.19414062798023224, + "logps/chosen": -462.79998779296875, + "logps/rejected": -507.20001220703125, + "loss": 2.12158203125, + "memory(GiB)": 40.52, + "nll_loss": 1.553125023841858, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.702929675579071, + "rewards/margins": 0.4051757752895355, + "rewards/rejected": 0.2975097596645355, + "step": 10, + "train_speed(iter/s)": 0.254866 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.03125, + "eval_logps/chosen": -360.0, + "eval_logps/rejected": -496.0, + "eval_loss": 1.19140625, + "eval_nll_loss": 0.796875, + "eval_rewards/accuracies": 0.75, + "eval_rewards/chosen": 2.40625, + "eval_rewards/margins": 1.4765625, + "eval_rewards/rejected": 0.92578125, + "eval_runtime": 1.3322, + "eval_samples_per_second": 3.002, + "eval_steps_per_second": 0.751, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 9.41426350911575, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -0.29057615995407104, + "logits/rejected": 0.11259765923023224, + "logps/chosen": -353.6000061035156, + "logps/rejected": -487.6000061035156, + "loss": 1.001171875, + "memory(GiB)": 40.52, + "nll_loss": 0.731249988079071, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.364062547683716, + "rewards/margins": 2.234375, + "rewards/rejected": 1.1257812976837158, + "step": 15, + "train_speed(iter/s)": 0.266928 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 3.417766982037976, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -0.8324218988418579, + "logits/rejected": 0.4007812440395355, + "logps/chosen": -258.20001220703125, + "logps/rejected": -377.6000061035156, + "loss": 1.1167236328125, + "memory(GiB)": 40.52, + "nll_loss": 0.922656238079071, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.8125, + "rewards/margins": 3.262500047683716, + "rewards/rejected": 3.543750047683716, + "step": 20, + "train_speed(iter/s)": 0.275171 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.0859375, + "eval_logps/chosen": -298.0, + "eval_logps/rejected": -458.0, + "eval_loss": 0.666015625, + "eval_nll_loss": 0.5625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 8.6875, + "eval_rewards/margins": 3.9375, + "eval_rewards/rejected": 4.78125, + "eval_runtime": 1.3177, + "eval_samples_per_second": 3.036, + "eval_steps_per_second": 0.759, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 2.721982515360601, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -0.3814453184604645, + "logits/rejected": 0.09628906100988388, + "logps/chosen": -373.6000061035156, + "logps/rejected": -422.20001220703125, + "loss": 0.673486328125, + "memory(GiB)": 40.52, + "nll_loss": 0.600781261920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.074999809265137, + "rewards/margins": 4.393750190734863, + "rewards/rejected": 4.684374809265137, + "step": 25, + "train_speed(iter/s)": 0.274036 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.9866084312671491, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -0.5782226324081421, + "logits/rejected": 0.4605468809604645, + "logps/chosen": -250.6999969482422, + "logps/rejected": -412.79998779296875, + "loss": 0.50615234375, + "memory(GiB)": 40.52, + "nll_loss": 0.48750001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.949999809265137, + "rewards/margins": 6.068749904632568, + "rewards/rejected": 4.884375095367432, + "step": 30, + "train_speed(iter/s)": 0.277362 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -2.078125, + "eval_logits/rejected": 1.203125, + "eval_logps/chosen": -272.0, + "eval_logps/rejected": -468.0, + "eval_loss": 0.49609375, + "eval_nll_loss": 0.49609375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.3125, + "eval_rewards/margins": 7.5, + "eval_rewards/rejected": 3.78125, + "eval_runtime": 1.3623, + "eval_samples_per_second": 2.936, + "eval_steps_per_second": 0.734, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 1.2141834186198552, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -0.512499988079071, + "logits/rejected": 0.31171876192092896, + "logps/chosen": -379.20001220703125, + "logps/rejected": -446.0, + "loss": 0.4916259765625, + "memory(GiB)": 40.52, + "nll_loss": 0.4892578125, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.012499809265137, + "rewards/margins": 8.487500190734863, + "rewards/rejected": 3.543750047683716, + "step": 35, + "train_speed(iter/s)": 0.275044 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 1.0755142345948945, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -0.24981689453125, + "logits/rejected": 0.541796863079071, + "logps/chosen": -271.3999938964844, + "logps/rejected": -482.3999938964844, + "loss": 0.45865478515625, + "memory(GiB)": 40.52, + "nll_loss": 0.563281238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.9375, + "rewards/margins": 9.762499809265137, + "rewards/rejected": 2.1859374046325684, + "step": 40, + "train_speed(iter/s)": 0.278234 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.7734375, + "eval_logits/rejected": 1.65625, + "eval_logps/chosen": -253.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.4462890625, + "eval_nll_loss": 0.4453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.125, + "eval_rewards/margins": 12.125, + "eval_rewards/rejected": 1.0, + "eval_runtime": 1.291, + "eval_samples_per_second": 3.098, + "eval_steps_per_second": 0.775, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.6599652846962414, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -0.24863281846046448, + "logits/rejected": 0.8564453125, + "logps/chosen": -306.20001220703125, + "logps/rejected": -480.0, + "loss": 0.44638671875, + "memory(GiB)": 40.52, + "nll_loss": 0.44648438692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.050000190734863, + "rewards/margins": 12.537500381469727, + "rewards/rejected": 1.506250023841858, + "step": 45, + "train_speed(iter/s)": 0.276176 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.5744853865576506, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": 0.05844726413488388, + "logits/rejected": 1.05859375, + "logps/chosen": -300.20001220703125, + "logps/rejected": -461.6000061035156, + "loss": 0.434783935546875, + "memory(GiB)": 40.52, + "nll_loss": 0.4339843690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.8125, + "rewards/margins": 12.762499809265137, + "rewards/rejected": 2.0621094703674316, + "step": 50, + "train_speed(iter/s)": 0.279108 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.5546875, + "eval_logits/rejected": 1.9296875, + "eval_logps/chosen": -242.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.425537109375, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.25, + "eval_rewards/margins": 13.25, + "eval_rewards/rejected": 0.9765625, + "eval_runtime": 1.3278, + "eval_samples_per_second": 3.013, + "eval_steps_per_second": 0.753, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2926328226891597, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -0.1898193359375, + "logits/rejected": 1.259374976158142, + "logps/chosen": -261.20001220703125, + "logps/rejected": -431.20001220703125, + "loss": 0.40357666015625, + "memory(GiB)": 40.52, + "nll_loss": 0.40234375, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.324999809265137, + "rewards/margins": 12.612500190734863, + "rewards/rejected": 1.704687476158142, + "step": 55, + "train_speed(iter/s)": 0.280714 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.24140281258606347, + "learning_rate": 5e-05, + "logits/chosen": -0.07255859673023224, + "logits/rejected": 0.758007824420929, + "logps/chosen": -263.3999938964844, + "logps/rejected": -405.20001220703125, + "loss": 0.28531494140625, + "memory(GiB)": 40.52, + "nll_loss": 0.28496092557907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.337499618530273, + "rewards/margins": 12.774999618530273, + "rewards/rejected": 2.5546875, + "step": 60, + "train_speed(iter/s)": 0.283696 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.40625, + "eval_logits/rejected": 1.96875, + "eval_logps/chosen": -241.0, + "eval_logps/rejected": -498.0, + "eval_loss": 0.425048828125, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.375, + "eval_rewards/margins": 13.5625, + "eval_rewards/rejected": 0.80078125, + "eval_runtime": 1.3237, + "eval_samples_per_second": 3.022, + "eval_steps_per_second": 0.755, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.6094457836784298, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -0.5101562738418579, + "logits/rejected": 1.0482909679412842, + "logps/chosen": -261.0, + "logps/rejected": -409.6000061035156, + "loss": 0.3310546875, + "memory(GiB)": 40.52, + "nll_loss": 0.33125001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.774999618530273, + "rewards/margins": 14.050000190734863, + "rewards/rejected": 1.736328125, + "step": 65, + "train_speed(iter/s)": 0.283196 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.45260618201827363, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": 0.03581542894244194, + "logits/rejected": 1.0457031726837158, + "logps/chosen": -193.0, + "logps/rejected": -501.6000061035156, + "loss": 0.32975921630859373, + "memory(GiB)": 40.52, + "nll_loss": 0.3296875059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.487500190734863, + "rewards/margins": 12.287500381469727, + "rewards/rejected": 2.176562547683716, + "step": 70, + "train_speed(iter/s)": 0.283818 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.3203125, + "eval_logits/rejected": 1.9765625, + "eval_logps/chosen": -237.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.41455078125, + "eval_nll_loss": 0.4140625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.75, + "eval_rewards/margins": 14.0625, + "eval_rewards/rejected": 0.6796875, + "eval_runtime": 1.3685, + "eval_samples_per_second": 2.923, + "eval_steps_per_second": 0.731, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.7812661467833826, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -0.42255860567092896, + "logits/rejected": 1.28125, + "logps/chosen": -298.0, + "logps/rejected": -456.79998779296875, + "loss": 0.40250244140625, + "memory(GiB)": 40.52, + "nll_loss": 0.4027343690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.800000190734863, + "rewards/margins": 15.125, + "rewards/rejected": 0.692187488079071, + "step": 75, + "train_speed(iter/s)": 0.283316 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5488588034556375, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -0.18242187798023224, + "logits/rejected": 1.212890625, + "logps/chosen": -239.39999389648438, + "logps/rejected": -437.20001220703125, + "loss": 0.3640625, + "memory(GiB)": 40.52, + "nll_loss": 0.38066405057907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.3125, + "rewards/margins": 13.925000190734863, + "rewards/rejected": 1.3738281726837158, + "step": 80, + "train_speed(iter/s)": 0.282344 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.3046875, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -235.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.410400390625, + "eval_nll_loss": 0.41015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.9375, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": 0.375, + "eval_runtime": 1.3645, + "eval_samples_per_second": 2.931, + "eval_steps_per_second": 0.733, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.5977630685878709, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -0.2928222715854645, + "logits/rejected": 1.396875023841858, + "logps/chosen": -250.60000610351562, + "logps/rejected": -473.6000061035156, + "loss": 0.339306640625, + "memory(GiB)": 40.52, + "nll_loss": 0.3388671875, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.162500381469727, + "rewards/margins": 15.362500190734863, + "rewards/rejected": 0.8084961175918579, + "step": 85, + "train_speed(iter/s)": 0.282252 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.2955118166701661, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -0.35551756620407104, + "logits/rejected": 0.749218761920929, + "logps/chosen": -254.39999389648438, + "logps/rejected": -422.0, + "loss": 0.3234832763671875, + "memory(GiB)": 40.52, + "nll_loss": 0.3232421875, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.237499237060547, + "rewards/margins": 14.3125, + "rewards/rejected": 1.9640624523162842, + "step": 90, + "train_speed(iter/s)": 0.285022 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.265625, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.407958984375, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.0625, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": 0.5, + "eval_runtime": 1.3391, + "eval_samples_per_second": 2.987, + "eval_steps_per_second": 0.747, + "step": 90 + }, + { + "epoch": 2.5, + "grad_norm": 0.25565176186037897, + "learning_rate": 7.444166378150013e-06, + "logits/chosen": -0.21904297173023224, + "logits/rejected": 1.0421874523162842, + "logps/chosen": -312.6000061035156, + "logps/rejected": -527.5999755859375, + "loss": 0.41697998046875, + "memory(GiB)": 40.52, + "nll_loss": 0.41621094942092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 17.387500762939453, + "rewards/margins": 16.700000762939453, + "rewards/rejected": 0.702099621295929, + "step": 95, + "train_speed(iter/s)": 0.283485 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 0.44788320009382443, + "learning_rate": 4.089194655986306e-06, + "logits/chosen": -0.5326172113418579, + "logits/rejected": 1.3898437023162842, + "logps/chosen": -245.89999389648438, + "logps/rejected": -406.79998779296875, + "loss": 0.351910400390625, + "memory(GiB)": 49.77, + "nll_loss": 0.35175782442092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.462499618530273, + "rewards/margins": 15.600000381469727, + "rewards/rejected": 0.850390613079071, + "step": 100, + "train_speed(iter/s)": 0.28525 + }, + { + "epoch": 2.6315789473684212, + "eval_logits/chosen": -1.265625, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.408935546875, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.0625, + "eval_rewards/margins": 14.5, + "eval_rewards/rejected": 0.6015625, + "eval_runtime": 1.3384, + "eval_samples_per_second": 2.989, + "eval_steps_per_second": 0.747, + "step": 100 + }, + { + "epoch": 2.763157894736842, + "grad_norm": 0.7897418622893607, + "learning_rate": 1.70370868554659e-06, + "logits/chosen": -0.3160156309604645, + "logits/rejected": 1.130468726158142, + "logps/chosen": -228.39999389648438, + "logps/rejected": -440.79998779296875, + "loss": 0.316326904296875, + "memory(GiB)": 49.77, + "nll_loss": 0.3160156309604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.012500762939453, + "rewards/margins": 13.800000190734863, + "rewards/rejected": 2.2035155296325684, + "step": 105, + "train_speed(iter/s)": 0.284651 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 0.45474075769771194, + "learning_rate": 3.380821129028489e-07, + "logits/chosen": -0.35332030057907104, + "logits/rejected": 1.181249976158142, + "logps/chosen": -212.39999389648438, + "logps/rejected": -443.6000061035156, + "loss": 0.31457977294921874, + "memory(GiB)": 49.77, + "nll_loss": 0.314453125, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.824999809265137, + "rewards/margins": 13.699999809265137, + "rewards/rejected": 2.114062547683716, + "step": 110, + "train_speed(iter/s)": 0.285606 + }, + { + "epoch": 2.8947368421052633, + "eval_logits/chosen": -1.265625, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.40771484375, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.125, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": 0.5, + "eval_runtime": 1.3701, + "eval_samples_per_second": 2.92, + "eval_steps_per_second": 0.73, + "step": 110 + }, + { + "epoch": 3.0, + "eval_logits/chosen": -1.2578125, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.406982421875, + "eval_nll_loss": 0.40625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.125, + "eval_rewards/margins": 14.625, + "eval_rewards/rejected": 0.4765625, + "eval_runtime": 1.2905, + "eval_samples_per_second": 3.1, + "eval_steps_per_second": 0.775, + "step": 114 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 145005522976768.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e926cd924b33a09827843ef082da1793fb49fece --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:512ce35c3cb0d098001365fc17b658c77df2485cba0600e08e62a9d5e66bbbec +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/README.md b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e4e3f02b35c0f2feed1d84197d9fc0806a438243 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b60a9ad0460c3988479843eac97361671f44930 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "q_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd1c6102790aa2bf54385407aa55a911be4f3d58 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93672b754b2c8907b6058a7b49d95e54588a37a7293bf018ffceffec15f65d3c +size 40422208 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/args.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/args.json new file mode 100644 index 0000000000000000000000000000000000000000..abf6bbaf2688cf3210ff53d87ff14d0ddce51227 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-7b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6e0e667d747caf0c316f1c46a08d6c81d5baf68 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65bde8fa16a5dc2c188d86646d53ac37ed16ab10f2665351378933bc7acf06b5 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..61adc8ff1ec9334ca349ce6f15cde1aab37a4d90 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52a7c1f6e2f606ae47b69c3db9a1fc8486275eecb79b3669081f112b685c3115 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d19862a05d9c1ad157770dde3159dd5055b0571 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73a031d64a705acaf3026cf77a72050830c1087c771719d979719330baaf4f6d +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c2b6191386c84098b6a65db1563dac736ef00ce --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0f69ea94c2463810e3bcd9f83a9f6b0b55cdd0e0f71469ce57d7882b06df9eb +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..13ce622e4f63b96fe09a697f3476665590f977ca --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:842886c4943bd59019ddf0f068cb7ea0490b4b49935f87463e60cad962bd981e +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3bc340f6f66fbe991acd5cdf220810f6ed7118a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45667398abea9b21777a50e0e29831715a379c6611995ab49e4efb6ba1c25cdd +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e7f67fcc9923d7c6d691ea320357a1bfd65cf23 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fab52fef2f2842947b22eca4e629e9af5e1dee91f1355d3681c5c17308efd77 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c5d86e1b78902c1edf96c2aeb12fd0970dbf9dc --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35636a19e5f5b7e352bb8f2a336dc528617406ab818e3eb520ff95252f1ad045 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..907119852928ed09d6e797d0c8b6b6afbcc39c47 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41be4a3da2e1d9ada88a564e57e68a179d0f4d3015921aa43b816bb2db11d812 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b03895f022afa69ae39102dc977acb8a69da371b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:903a995694e42d740831ac0f804c917f6d94d6256abc61366d53140608274561 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1f2797a5d6a72fc2ee526fad3243809fc1b0e93 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d612f5cc1f04060a662b881b69eef2573f37a9f123991fb2e4ec8fbf75090f7 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e09da547b800b7ee1fdaf3d0cdba50cd76f720eb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a36f568d392a165e7cdfd23a674624aff885b7e542c817dc85f17338f3d031f +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c21c94cd3a45200995942b6f7205f6116c015b2 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1975d338ad77bacca2045078cb8c938feef232709154e081c7ec29ef6c38ed9 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..53218daad5d247dd2ba46b15cb2c014627ad35cb --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b998f129ff41d54973b5648e13a69503899063871132189954c90d31c6f14900 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..46f4c0dd4b1b825f665721be91ae4d1980f46212 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07456b371d001f8e5eeeffeafa980aec54e12c55c9485335fd694f01abae179 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f1bba5eeb8a975ae06eef0bdfce3b1800293178 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/global_step80/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e62e8db390fea28ab5c6c90f21d525c6ee93880a05ab99e7a37800e65ef5a34 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/latest b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/latest new file mode 100644 index 0000000000000000000000000000000000000000..75eab498d0366633484ab40334e4b8fb92b16dad --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/latest @@ -0,0 +1 @@ +global_step80 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..0b8b4067e4559b34f9b554c4963fe80d7f5fe839 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba4c26c615bd5830d41566fab54dc69174be292761b34514b27fbe82b45b630b +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..8c0265e51b5761ac9b323aa87ba00ba14b97e202 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c761d7f9b90c29c2d348a1133fd39be52c65e6bee4c2d179f6a6e564eb3a40 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f5dd5aabcd6e7332f14a4796d6ec6c758e10aea0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccab847cc956e055fd3f9dcce06898826d065211e945b83576c8d487f87c5469 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..bcbdea3a573c2b7717f23e2ea0e4a6da6670d65d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e5f1dbdcf6ec820c22fd1e4258fcd7af2a2bce65c480988d3f111aa574c9c06 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..1cdcb8d1710063a6c30dec635b4c44e3cb6cd24e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a23184c3e806d2649776427d1da2c0c9137f9b23a84468f3bdd5bbc75f696c9 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e39323a662c284cd109b5ce8c39e8a0ce375f2c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382fc01b809542bf6f5e26742e3e19e80a1f189ac5de24cf8cd822e303916b83 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ce685d2e57181f70debfb25eb90cb76ceaf47da --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b178265c7d2ae07bff10b7312e5e49b9f5b4914c38969d2f64a6ca006296bca +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5e363b8083cdd817e0b3a2e6fd1b65a905e189b --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:668825a859126c4cf32afb883895c91004130b6aee02178736ca2840e5429ad0 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eaf96d6803aea265d756d902db3c4cc2386f9742 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90524bcdb94734ac7120e4205110f14662bff8cee00eed50355875dcdc538029 +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3b9f25d5251e1a4490b64b90eda5bd94255e8c68 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/trainer_state.json @@ -0,0 +1,475 @@ +{ + "best_metric": 0.41040039, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80", + "epoch": 2.1052631578947367, + "eval_steps": 10, + "global_step": 80, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 10.70593006577457, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": 0.828125, + "logits/rejected": -0.283203125, + "logps/chosen": -444.0, + "logps/rejected": -360.0, + "loss": 1.12939453125, + "memory(GiB)": 6.7, + "nll_loss": 0.439453125, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.113961 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.176485007231813, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -0.1298828125, + "logits/rejected": 0.03759765625, + "logps/chosen": -377.25, + "logps/rejected": -512.0, + "loss": 1.8751220703125, + "memory(GiB)": 16.45, + "nll_loss": 1.1748046875, + "rewards/accuracies": 0.1875, + "rewards/chosen": 0.01715087890625, + "rewards/margins": 0.0078125, + "rewards/rejected": 0.009368896484375, + "step": 5, + "train_speed(iter/s)": 0.234379 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 12.015147423707822, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": 0.07973632961511612, + "logits/rejected": 0.19414062798023224, + "logps/chosen": -462.79998779296875, + "logps/rejected": -507.20001220703125, + "loss": 2.12158203125, + "memory(GiB)": 40.52, + "nll_loss": 1.553125023841858, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.702929675579071, + "rewards/margins": 0.4051757752895355, + "rewards/rejected": 0.2975097596645355, + "step": 10, + "train_speed(iter/s)": 0.254866 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.03125, + "eval_logps/chosen": -360.0, + "eval_logps/rejected": -496.0, + "eval_loss": 1.19140625, + "eval_nll_loss": 0.796875, + "eval_rewards/accuracies": 0.75, + "eval_rewards/chosen": 2.40625, + "eval_rewards/margins": 1.4765625, + "eval_rewards/rejected": 0.92578125, + "eval_runtime": 1.3322, + "eval_samples_per_second": 3.002, + "eval_steps_per_second": 0.751, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 9.41426350911575, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -0.29057615995407104, + "logits/rejected": 0.11259765923023224, + "logps/chosen": -353.6000061035156, + "logps/rejected": -487.6000061035156, + "loss": 1.001171875, + "memory(GiB)": 40.52, + "nll_loss": 0.731249988079071, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.364062547683716, + "rewards/margins": 2.234375, + "rewards/rejected": 1.1257812976837158, + "step": 15, + "train_speed(iter/s)": 0.266928 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 3.417766982037976, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -0.8324218988418579, + "logits/rejected": 0.4007812440395355, + "logps/chosen": -258.20001220703125, + "logps/rejected": -377.6000061035156, + "loss": 1.1167236328125, + "memory(GiB)": 40.52, + "nll_loss": 0.922656238079071, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.8125, + "rewards/margins": 3.262500047683716, + "rewards/rejected": 3.543750047683716, + "step": 20, + "train_speed(iter/s)": 0.275171 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.0859375, + "eval_logps/chosen": -298.0, + "eval_logps/rejected": -458.0, + "eval_loss": 0.666015625, + "eval_nll_loss": 0.5625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 8.6875, + "eval_rewards/margins": 3.9375, + "eval_rewards/rejected": 4.78125, + "eval_runtime": 1.3177, + "eval_samples_per_second": 3.036, + "eval_steps_per_second": 0.759, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 2.721982515360601, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -0.3814453184604645, + "logits/rejected": 0.09628906100988388, + "logps/chosen": -373.6000061035156, + "logps/rejected": -422.20001220703125, + "loss": 0.673486328125, + "memory(GiB)": 40.52, + "nll_loss": 0.600781261920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.074999809265137, + "rewards/margins": 4.393750190734863, + "rewards/rejected": 4.684374809265137, + "step": 25, + "train_speed(iter/s)": 0.274036 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.9866084312671491, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -0.5782226324081421, + "logits/rejected": 0.4605468809604645, + "logps/chosen": -250.6999969482422, + "logps/rejected": -412.79998779296875, + "loss": 0.50615234375, + "memory(GiB)": 40.52, + "nll_loss": 0.48750001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.949999809265137, + "rewards/margins": 6.068749904632568, + "rewards/rejected": 4.884375095367432, + "step": 30, + "train_speed(iter/s)": 0.277362 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -2.078125, + "eval_logits/rejected": 1.203125, + "eval_logps/chosen": -272.0, + "eval_logps/rejected": -468.0, + "eval_loss": 0.49609375, + "eval_nll_loss": 0.49609375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.3125, + "eval_rewards/margins": 7.5, + "eval_rewards/rejected": 3.78125, + "eval_runtime": 1.3623, + "eval_samples_per_second": 2.936, + "eval_steps_per_second": 0.734, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 1.2141834186198552, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -0.512499988079071, + "logits/rejected": 0.31171876192092896, + "logps/chosen": -379.20001220703125, + "logps/rejected": -446.0, + "loss": 0.4916259765625, + "memory(GiB)": 40.52, + "nll_loss": 0.4892578125, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.012499809265137, + "rewards/margins": 8.487500190734863, + "rewards/rejected": 3.543750047683716, + "step": 35, + "train_speed(iter/s)": 0.275044 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 1.0755142345948945, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -0.24981689453125, + "logits/rejected": 0.541796863079071, + "logps/chosen": -271.3999938964844, + "logps/rejected": -482.3999938964844, + "loss": 0.45865478515625, + "memory(GiB)": 40.52, + "nll_loss": 0.563281238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.9375, + "rewards/margins": 9.762499809265137, + "rewards/rejected": 2.1859374046325684, + "step": 40, + "train_speed(iter/s)": 0.278234 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.7734375, + "eval_logits/rejected": 1.65625, + "eval_logps/chosen": -253.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.4462890625, + "eval_nll_loss": 0.4453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.125, + "eval_rewards/margins": 12.125, + "eval_rewards/rejected": 1.0, + "eval_runtime": 1.291, + "eval_samples_per_second": 3.098, + "eval_steps_per_second": 0.775, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.6599652846962414, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -0.24863281846046448, + "logits/rejected": 0.8564453125, + "logps/chosen": -306.20001220703125, + "logps/rejected": -480.0, + "loss": 0.44638671875, + "memory(GiB)": 40.52, + "nll_loss": 0.44648438692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.050000190734863, + "rewards/margins": 12.537500381469727, + "rewards/rejected": 1.506250023841858, + "step": 45, + "train_speed(iter/s)": 0.276176 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.5744853865576506, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": 0.05844726413488388, + "logits/rejected": 1.05859375, + "logps/chosen": -300.20001220703125, + "logps/rejected": -461.6000061035156, + "loss": 0.434783935546875, + "memory(GiB)": 40.52, + "nll_loss": 0.4339843690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.8125, + "rewards/margins": 12.762499809265137, + "rewards/rejected": 2.0621094703674316, + "step": 50, + "train_speed(iter/s)": 0.279108 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.5546875, + "eval_logits/rejected": 1.9296875, + "eval_logps/chosen": -242.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.425537109375, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.25, + "eval_rewards/margins": 13.25, + "eval_rewards/rejected": 0.9765625, + "eval_runtime": 1.3278, + "eval_samples_per_second": 3.013, + "eval_steps_per_second": 0.753, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2926328226891597, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -0.1898193359375, + "logits/rejected": 1.259374976158142, + "logps/chosen": -261.20001220703125, + "logps/rejected": -431.20001220703125, + "loss": 0.40357666015625, + "memory(GiB)": 40.52, + "nll_loss": 0.40234375, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.324999809265137, + "rewards/margins": 12.612500190734863, + "rewards/rejected": 1.704687476158142, + "step": 55, + "train_speed(iter/s)": 0.280714 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.24140281258606347, + "learning_rate": 5e-05, + "logits/chosen": -0.07255859673023224, + "logits/rejected": 0.758007824420929, + "logps/chosen": -263.3999938964844, + "logps/rejected": -405.20001220703125, + "loss": 0.28531494140625, + "memory(GiB)": 40.52, + "nll_loss": 0.28496092557907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.337499618530273, + "rewards/margins": 12.774999618530273, + "rewards/rejected": 2.5546875, + "step": 60, + "train_speed(iter/s)": 0.283696 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.40625, + "eval_logits/rejected": 1.96875, + "eval_logps/chosen": -241.0, + "eval_logps/rejected": -498.0, + "eval_loss": 0.425048828125, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.375, + "eval_rewards/margins": 13.5625, + "eval_rewards/rejected": 0.80078125, + "eval_runtime": 1.3237, + "eval_samples_per_second": 3.022, + "eval_steps_per_second": 0.755, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.6094457836784298, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -0.5101562738418579, + "logits/rejected": 1.0482909679412842, + "logps/chosen": -261.0, + "logps/rejected": -409.6000061035156, + "loss": 0.3310546875, + "memory(GiB)": 40.52, + "nll_loss": 0.33125001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.774999618530273, + "rewards/margins": 14.050000190734863, + "rewards/rejected": 1.736328125, + "step": 65, + "train_speed(iter/s)": 0.283196 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.45260618201827363, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": 0.03581542894244194, + "logits/rejected": 1.0457031726837158, + "logps/chosen": -193.0, + "logps/rejected": -501.6000061035156, + "loss": 0.32975921630859373, + "memory(GiB)": 40.52, + "nll_loss": 0.3296875059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.487500190734863, + "rewards/margins": 12.287500381469727, + "rewards/rejected": 2.176562547683716, + "step": 70, + "train_speed(iter/s)": 0.283818 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.3203125, + "eval_logits/rejected": 1.9765625, + "eval_logps/chosen": -237.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.41455078125, + "eval_nll_loss": 0.4140625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.75, + "eval_rewards/margins": 14.0625, + "eval_rewards/rejected": 0.6796875, + "eval_runtime": 1.3685, + "eval_samples_per_second": 2.923, + "eval_steps_per_second": 0.731, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.7812661467833826, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -0.42255860567092896, + "logits/rejected": 1.28125, + "logps/chosen": -298.0, + "logps/rejected": -456.79998779296875, + "loss": 0.40250244140625, + "memory(GiB)": 40.52, + "nll_loss": 0.4027343690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.800000190734863, + "rewards/margins": 15.125, + "rewards/rejected": 0.692187488079071, + "step": 75, + "train_speed(iter/s)": 0.283316 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5488588034556375, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -0.18242187798023224, + "logits/rejected": 1.212890625, + "logps/chosen": -239.39999389648438, + "logps/rejected": -437.20001220703125, + "loss": 0.3640625, + "memory(GiB)": 40.52, + "nll_loss": 0.38066405057907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.3125, + "rewards/margins": 13.925000190734863, + "rewards/rejected": 1.3738281726837158, + "step": 80, + "train_speed(iter/s)": 0.282344 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.3046875, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -235.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.410400390625, + "eval_nll_loss": 0.41015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.9375, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": 0.375, + "eval_runtime": 1.3645, + "eval_samples_per_second": 2.931, + "eval_steps_per_second": 0.733, + "step": 80 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 102458195968000.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e926cd924b33a09827843ef082da1793fb49fece --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:512ce35c3cb0d098001365fc17b658c77df2485cba0600e08e62a9d5e66bbbec +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-80/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/README.md b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e4e3f02b35c0f2feed1d84197d9fc0806a438243 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/README.md @@ -0,0 +1,202 @@ +--- +base_model: /m2v_intern/wangruotong/logs/Models/deepseek-r1-7b +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.14.0 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/adapter_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b60a9ad0460c3988479843eac97361671f44930 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/adapter_config.json @@ -0,0 +1,37 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "bias": "none", + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "gate_proj", + "o_proj", + "v_proj", + "k_proj", + "q_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/adapter_model.safetensors b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa3b41a5555f636562cbb032502a1eac99e3d8f3 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed0eac61f10b35b28cadd0bdfde533bb374dc1588db72bc5998f0556ce12b9f +size 40422208 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/additional_config.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/additional_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe5159d1d10a158affb4d328c70025d891e16d8 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/additional_config.json @@ -0,0 +1 @@ +{"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/args.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/args.json new file mode 100644 index 0000000000000000000000000000000000000000..abf6bbaf2688cf3210ff53d87ff14d0ddce51227 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/args.json @@ -0,0 +1,374 @@ +{ + "model": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "model_type": "deepseek_r1_distill", + "model_revision": null, + "task_type": "causal_lm", + "torch_dtype": "bfloat16", + "attn_impl": null, + "num_labels": null, + "rope_scaling": null, + "device_map": null, + "local_repo_path": null, + "template": "deepseek_r1", + "system": null, + "max_length": 4096, + "truncation_strategy": "delete", + "max_pixels": null, + "tools_prompt": "react_en", + "norm_bbox": null, + "padding_side": "right", + "loss_scale": "last_round", + "sequence_parallel_size": 1, + "use_chat_template": true, + "template_backend": "swift", + "dataset": [ + "/home/wangruotong/LLM_test/real-dpo-05-ds.jsonl" + ], + "val_dataset": [], + "split_dataset_ratio": 0.01, + "data_seed": 42, + "dataset_num_proc": 4, + "streaming": false, + "enable_cache": false, + "download_mode": "reuse_dataset_if_exists", + "columns": {}, + "strict": false, + "model_name": [ + null, + null + ], + "model_author": [ + null, + null + ], + "custom_dataset_info": [], + "quant_method": null, + "quant_bits": null, + "hqq_axis": null, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "bnb_4bit_quant_storage": null, + "max_new_tokens": 64, + "temperature": 0.7, + "top_k": null, + "top_p": null, + "repetition_penalty": null, + "num_beams": 1, + "stream": false, + "stop_words": [], + "logprobs": false, + "top_logprobs": null, + "ckpt_dir": null, + "load_dataset_config": null, + "lora_modules": [], + "tuner_backend": "peft", + "train_type": "lora", + "adapters": [], + "seed": 42, + "model_kwargs": {}, + "load_args": true, + "load_data_args": false, + "use_hf": false, + "hub_token": null, + "custom_register_path": [], + "ignore_args_error": false, + "use_swift_lora": false, + "output_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144", + "overwrite_output_dir": false, + "do_train": false, + "do_eval": false, + "do_predict": false, + "eval_strategy": "steps", + "prediction_loss_only": false, + "per_device_train_batch_size": 1, + "per_device_eval_batch_size": 1, + "per_gpu_train_batch_size": null, + "per_gpu_eval_batch_size": null, + "gradient_accumulation_steps": 1, + "eval_accumulation_steps": null, + "eval_delay": 0, + "torch_empty_cache_steps": null, + "learning_rate": 0.0001, + "weight_decay": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "adam_epsilon": 1e-08, + "max_grad_norm": 1.0, + "num_train_epochs": 3.0, + "max_steps": -1, + "lr_scheduler_type": "cosine", + "lr_scheduler_kwargs": null, + "warmup_ratio": 0.05, + "warmup_steps": 0, + "log_level": "passive", + "log_level_replica": "warning", + "log_on_each_node": true, + "logging_dir": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs", + "logging_strategy": "steps", + "logging_first_step": true, + "logging_steps": 5, + "logging_nan_inf_filter": true, + "save_strategy": "steps", + "save_steps": 10.0, + "save_total_limit": 5, + "save_safetensors": true, + "save_on_each_node": false, + "save_only_model": false, + "restore_callback_states_from_checkpoint": false, + "no_cuda": false, + "use_cpu": false, + "use_mps_device": false, + "jit_mode_eval": false, + "use_ipex": false, + "bf16": true, + "fp16": false, + "fp16_opt_level": "O1", + "half_precision_backend": "auto", + "bf16_full_eval": false, + "fp16_full_eval": false, + "tf32": null, + "local_rank": 0, + "ddp_backend": null, + "tpu_num_cores": null, + "tpu_metrics_debug": false, + "debug": null, + "dataloader_drop_last": false, + "eval_steps": 10.0, + "dataloader_num_workers": 4, + "dataloader_prefetch_factor": null, + "past_index": -1, + "run_name": null, + "disable_tqdm": null, + "remove_unused_columns": false, + "label_names": null, + "load_best_model_at_end": false, + "metric_for_best_model": "loss", + "greater_is_better": false, + "ignore_data_skip": false, + "fsdp": "", + "fsdp_min_num_params": 0, + "fsdp_config": null, + "fsdp_transformer_layer_cls_to_wrap": null, + "accelerator_config": { + "dispatch_batches": false + }, + "deepspeed": { + "fp16": { + "enabled": "auto", + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "bf16": { + "enabled": "auto" + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none", + "pin_memory": true + }, + "offload_param": { + "device": "none", + "pin_memory": true + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1000000000.0, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1000000000.0, + "stage3_max_reuse_distance": 1000000000.0, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "steps_per_print": 2000, + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "wall_clock_breakdown": false + }, + "label_smoothing_factor": 0.0, + "optim": "adamw_torch", + "optim_args": null, + "adafactor": false, + "group_by_length": false, + "length_column_name": "length", + "report_to": [ + "tensorboard" + ], + "ddp_find_unused_parameters": null, + "ddp_bucket_cap_mb": null, + "ddp_broadcast_buffers": null, + "dataloader_pin_memory": true, + "dataloader_persistent_workers": false, + "skip_memory_metrics": true, + "use_legacy_prediction_loop": false, + "push_to_hub": false, + "resume_from_checkpoint": null, + "hub_model_id": null, + "hub_strategy": "every_save", + "hub_private_repo": null, + "hub_always_push": false, + "gradient_checkpointing": true, + "gradient_checkpointing_kwargs": null, + "include_inputs_for_metrics": false, + "include_for_metrics": [], + "eval_do_concat_batches": true, + "fp16_backend": "auto", + "evaluation_strategy": "steps", + "push_to_hub_model_id": null, + "push_to_hub_organization": null, + "push_to_hub_token": null, + "mp_parameters": "", + "auto_find_batch_size": false, + "full_determinism": false, + "torchdynamo": null, + "ray_scope": "last", + "ddp_timeout": 1800, + "torch_compile": false, + "torch_compile_backend": null, + "torch_compile_mode": null, + "dispatch_batches": null, + "split_batches": null, + "include_tokens_per_second": false, + "include_num_input_tokens_seen": false, + "neftune_noise_alpha": null, + "optim_target_modules": null, + "batch_eval_metrics": false, + "eval_on_start": false, + "use_liger_kernel": false, + "eval_use_gather_object": false, + "average_tokens_across_devices": false, + "sortish_sampler": false, + "predict_with_generate": false, + "generation_max_length": null, + "generation_num_beams": null, + "generation_config": null, + "freeze_parameters": [], + "freeze_parameters_ratio": 0.0, + "trainable_parameters": [], + "freeze_llm": false, + "freeze_vit": true, + "freeze_aligner": true, + "target_modules": [ + "all-linear" + ], + "target_regex": null, + "modules_to_save": [], + "lora_rank": 8, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_bias": "none", + "lora_dtype": null, + "lorap_lr_ratio": null, + "use_rslora": false, + "use_dora": false, + "lora_ga_batch_size": 2, + "lora_ga_iters": 2, + "lora_ga_max_length": 1024, + "lora_ga_direction": "ArB2r", + "lora_ga_scale": "stable", + "lora_ga_stable_gamma": 16, + "init_weights": true, + "fourier_n_frequency": 2000, + "fourier_scaling": 300.0, + "boft_block_size": 4, + "boft_block_num": 0, + "boft_n_butterfly_factor": 1, + "boft_dropout": 0.0, + "vera_rank": 256, + "vera_projection_prng_key": 0, + "vera_dropout": 0.0, + "vera_d_initial": 0.1, + "adapter_act": "gelu", + "adapter_length": 128, + "use_galore": false, + "galore_target_modules": null, + "galore_rank": 128, + "galore_update_proj_gap": 50, + "galore_scale": 1.0, + "galore_proj_type": "std", + "galore_optim_per_parameter": false, + "galore_with_embedding": false, + "galore_quantization": false, + "galore_proj_quant": false, + "galore_proj_bits": 4, + "galore_proj_group_size": 256, + "galore_cos_threshold": 0.4, + "galore_gamma_proj": 2, + "galore_queue_size": 5, + "adalora_target_r": 8, + "adalora_init_r": 12, + "adalora_tinit": 0, + "adalora_tfinal": 0, + "adalora_deltaT": 1, + "adalora_beta1": 0.85, + "adalora_beta2": 0.85, + "adalora_orth_reg_weight": 0.5, + "llamapro_num_new_blocks": 4, + "llamapro_num_groups": null, + "lisa_activated_layers": 0, + "lisa_step_interval": 20, + "reft_layer_key": null, + "reft_layers": null, + "reft_rank": 4, + "reft_intervention_type": "LoreftIntervention", + "reft_args": null, + "use_liger": false, + "model_layer_cls_name": null, + "metric_warmup_step": 0, + "fsdp_num": 1, + "acc_steps": 1, + "add_version": true, + "resume_only_model": false, + "check_model": true, + "create_checkpoint_symlink": false, + "packing": false, + "lazy_tokenize": false, + "loss_type": "sigmoid", + "optimizer": null, + "metric": null, + "acc_strategy": "token", + "reward_model": null, + "reward_adapters": [], + "reward_model_type": null, + "reward_model_revision": null, + "num_ppo_epochs": 4, + "whiten_rewards": false, + "kl_coef": 0.05, + "cliprange": 0.2, + "vf_coef": 0.1, + "cliprange_value": 0.2, + "gamma": 1.0, + "lam": 0.95, + "num_mini_batches": 1, + "local_rollout_forward_batch_size": 64, + "num_sample_generations": 10, + "response_length": 512, + "missing_eos_penalty": null, + "rlhf_type": "dpo", + "ref_model": null, + "ref_model_type": null, + "ref_model_revision": null, + "beta": 0.1, + "label_smoothing": 0, + "rpo_alpha": 1.0, + "cpo_alpha": 1.0, + "simpo_gamma": 1, + "desirable_weight": 1.0, + "undesirable_weight": 1.0, + "rank": 0, + "global_world_size": 8, + "local_world_size": 8, + "model_suffix": "deepseek-r1-7b", + "model_info": "ModelInfo(model_type='deepseek_r1_distill', model_dir='/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, config=None, task_type='causal_lm', num_labels=None)", + "model_meta": "ModelMeta(model_type='deepseek_r1_distill', model_groups=[ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=['transformers>=4.37'], tags=[]), ModelGroup(models=[Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', hf_model_id='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='deepseek_r1', get_function=, model_arch='llama', architectures=['Qwen2ForCausalLM', 'LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=[], tags=[])", + "model_dir": "/m2v_intern/wangruotong/logs/Models/deepseek-r1-7b", + "hub": "", + "training_args": "DPOConfig(output_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=10, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=4, dataloader_prefetch_factor=None, past_index=-1, run_name='/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=4, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=4096, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)" +} \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f81af063aa4adf30c0c01ed2cc686d43bdc23205 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6adc942267fa879d8ca974c950c07d0dd21878238fecd6d6893117cf8b109fad +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..36dc62225628f116a254db526bd2b691f516baf0 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc0d7df03fc109a4f95daca9902fc4be16a549749454ecca3282514a74aef52f +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba07cb5d4d569a7bde90408ee87dc91700c8746e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:950d404de994419c24ad79b043cf3b26feb59d01ce4c7124f2546def69a507d5 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..14fe4431059f8f7fcff03859545714234d14948d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a927c706f360571ff0ee4b64577279af4a1473f04be2fbeda21ff51dcc265a4 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e53a8bf84cb57a76589cce896ef64cf8b9c8b72 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a8dfc7dd00c6c7937f7ce0d9a3f46c5eb624d6ccb8a418a73d559abc623c324 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..54c0573a51e89d1095586a494efa5a68755d1c3a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a39b63ca839de763f3549f3736d67b539b6403a3e944ab86538710bbca52414 +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..eefdb1d9aca62195eacd5ab4662f8ddd6beebe5d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdd004e4846a8e0a714fbad0511004fcbdde30177c03530877666cda8fbcc83f +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f9dc65ce8fc8947eefe82531bef447ea5d22029e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c651a75b97f2f16b421fe9bcbf23b196b63416b29ecdee3348e8edc9220b10aa +size 30281648 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2ddf263d0bbfafc59cfd61bc3a0c4d239b5599e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e85838794056895d21f30752b5f4998b07f7ed00d8fb717bbb2bd83c11ce479 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c55b12bc56cc7a0f76877a92b327bd707199455 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_1_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e22daf696d9a2bc990c029c8b166df96a9c46700fe24aa4e62c183095432e08e +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ad5e7e41edc2354c2a332bba31cf725d2b57c1a --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_2_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2474de799cac2fe35d742799fa42f663d49ed74097fe7f8fa29a7b04e9aa2d82 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bce9c762fdcb2d85827dfaf7689155e50056c07d --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_3_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b7b3f07542ff494e76a2ed0743eca31ea67fcc72a502ef0a3e6a02a0a5b69e +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..359c03666c86b7ac4073c12e3901619ecae7edc5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_4_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064261d9d5cfc1f02c65e65192c1b359d04adccfe92bc07a09a8460983535e24 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a192d3d1e09d7b51b74e0a14059519773b20b01 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_5_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa44ff7d2c4aad6edaad1adc7dbb405171a6faf130109e604add3d527d8bc23 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3782be710ed25614631ae0091cf94385eeed343 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_6_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d907e70fe8ed2713e82c93280f9643133b7718a91fcbc78ba939e0843d794f3 +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a024f742a8855695732ecd1b220532c301e17af --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/global_step90/zero_pp_rank_7_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9264c6de9771df782e735e6b99c8fc50144392b9221b167a2eccb20cd5534b +size 388374 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/latest b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/latest new file mode 100644 index 0000000000000000000000000000000000000000..8e7a337e2cb23bf07023d223dd647df2d25f0fc1 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/latest @@ -0,0 +1 @@ +global_step90 \ No newline at end of file diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_0.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e31a2394e12bf431ae13288c3d90fe4727f07fa7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feb6462d333dbc5bb5e497ea9b0adb960f7616f79e6eea63222de6d5bd559516 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_1.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d1db0a0f44aa3ac1d82c3bf8dc2d8968eeba4ce7 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b045e1bfa728f51c8b51ab0faa20b128a4fbd350da006b9b39a19e24abdf5a74 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_2.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..75de18f57a056bd6a5f89df1abd045678f3f919e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76a3d058d2628a61848c2441d313f251278bd8f74ce43dc44d8cd8ad3e619a8 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_3.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fd100693bc9f3267d044ce4a16e702502dc03ec --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f72fc498e6eaa671cdc0e8a627a668b8ef607063a22ddb4edbc05e791be830 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_4.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..5aeeabfe119f1cb0c8c804f1b9a4d3049f478d69 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12889af98e175b734a788f4c5b8c4da91dd61ff3a05aaf61b9d4c66aa3dd8ad6 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_5.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..91fe0f42382ab06f4d26d753745a914c9e46100e --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe21a86abfceeac2cf2f48afd61a9a506cf61a287f3403f1adf391bb2ffa5a83 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_6.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..5830ca6bd04645962b6e56a00a91cd8349ca449c --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73488bec91f9dee6d8105d06f99edaf4d27b6b064250d4c7023f33285b2f3132 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_7.pth b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..343d1c0475f0dc64100dc67b09195e047f1a7bcf --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf6ee1cc2e1325b428a21172ec4e61b7220c5489751ea11c06bb66c77a0cd08 +size 15984 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/scheduler.pt b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a39c9cfeaa2d69cb5a66e83272eee65ddffaed5 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b768777109679597db5d1fa24a743962bede33623e22702b13b95eab2d42cb8 +size 1064 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/trainer_state.json b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..17413459bf6f88c8f22db7e21bca4feccd0c36a2 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/trainer_state.json @@ -0,0 +1,528 @@ +{ + "best_metric": 0.40795898, + "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90", + "epoch": 2.3684210526315788, + "eval_steps": 10, + "global_step": 90, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02631578947368421, + "grad_norm": 10.70593006577457, + "learning_rate": 1.6666666666666667e-05, + "logits/chosen": 0.828125, + "logits/rejected": -0.283203125, + "logps/chosen": -444.0, + "logps/rejected": -360.0, + "loss": 1.12939453125, + "memory(GiB)": 6.7, + "nll_loss": 0.439453125, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1, + "train_speed(iter/s)": 0.113961 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 14.176485007231813, + "learning_rate": 8.333333333333334e-05, + "logits/chosen": -0.1298828125, + "logits/rejected": 0.03759765625, + "logps/chosen": -377.25, + "logps/rejected": -512.0, + "loss": 1.8751220703125, + "memory(GiB)": 16.45, + "nll_loss": 1.1748046875, + "rewards/accuracies": 0.1875, + "rewards/chosen": 0.01715087890625, + "rewards/margins": 0.0078125, + "rewards/rejected": 0.009368896484375, + "step": 5, + "train_speed(iter/s)": 0.234379 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 12.015147423707822, + "learning_rate": 9.966191788709716e-05, + "logits/chosen": 0.07973632961511612, + "logits/rejected": 0.19414062798023224, + "logps/chosen": -462.79998779296875, + "logps/rejected": -507.20001220703125, + "loss": 2.12158203125, + "memory(GiB)": 40.52, + "nll_loss": 1.553125023841858, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.702929675579071, + "rewards/margins": 0.4051757752895355, + "rewards/rejected": 0.2975097596645355, + "step": 10, + "train_speed(iter/s)": 0.254866 + }, + { + "epoch": 0.2631578947368421, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.03125, + "eval_logps/chosen": -360.0, + "eval_logps/rejected": -496.0, + "eval_loss": 1.19140625, + "eval_nll_loss": 0.796875, + "eval_rewards/accuracies": 0.75, + "eval_rewards/chosen": 2.40625, + "eval_rewards/margins": 1.4765625, + "eval_rewards/rejected": 0.92578125, + "eval_runtime": 1.3322, + "eval_samples_per_second": 3.002, + "eval_steps_per_second": 0.751, + "step": 10 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 9.41426350911575, + "learning_rate": 9.829629131445342e-05, + "logits/chosen": -0.29057615995407104, + "logits/rejected": 0.11259765923023224, + "logps/chosen": -353.6000061035156, + "logps/rejected": -487.6000061035156, + "loss": 1.001171875, + "memory(GiB)": 40.52, + "nll_loss": 0.731249988079071, + "rewards/accuracies": 0.875, + "rewards/chosen": 3.364062547683716, + "rewards/margins": 2.234375, + "rewards/rejected": 1.1257812976837158, + "step": 15, + "train_speed(iter/s)": 0.266928 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 3.417766982037976, + "learning_rate": 9.591080534401371e-05, + "logits/chosen": -0.8324218988418579, + "logits/rejected": 0.4007812440395355, + "logps/chosen": -258.20001220703125, + "logps/rejected": -377.6000061035156, + "loss": 1.1167236328125, + "memory(GiB)": 40.52, + "nll_loss": 0.922656238079071, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 6.8125, + "rewards/margins": 3.262500047683716, + "rewards/rejected": 3.543750047683716, + "step": 20, + "train_speed(iter/s)": 0.275171 + }, + { + "epoch": 0.5263157894736842, + "eval_logits/chosen": -2.5, + "eval_logits/rejected": 1.0859375, + "eval_logps/chosen": -298.0, + "eval_logps/rejected": -458.0, + "eval_loss": 0.666015625, + "eval_nll_loss": 0.5625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 8.6875, + "eval_rewards/margins": 3.9375, + "eval_rewards/rejected": 4.78125, + "eval_runtime": 1.3177, + "eval_samples_per_second": 3.036, + "eval_steps_per_second": 0.759, + "step": 20 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 2.721982515360601, + "learning_rate": 9.255583362184999e-05, + "logits/chosen": -0.3814453184604645, + "logits/rejected": 0.09628906100988388, + "logps/chosen": -373.6000061035156, + "logps/rejected": -422.20001220703125, + "loss": 0.673486328125, + "memory(GiB)": 40.52, + "nll_loss": 0.600781261920929, + "rewards/accuracies": 1.0, + "rewards/chosen": 9.074999809265137, + "rewards/margins": 4.393750190734863, + "rewards/rejected": 4.684374809265137, + "step": 25, + "train_speed(iter/s)": 0.274036 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 0.9866084312671491, + "learning_rate": 8.83022221559489e-05, + "logits/chosen": -0.5782226324081421, + "logits/rejected": 0.4605468809604645, + "logps/chosen": -250.6999969482422, + "logps/rejected": -412.79998779296875, + "loss": 0.50615234375, + "memory(GiB)": 40.52, + "nll_loss": 0.48750001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 10.949999809265137, + "rewards/margins": 6.068749904632568, + "rewards/rejected": 4.884375095367432, + "step": 30, + "train_speed(iter/s)": 0.277362 + }, + { + "epoch": 0.7894736842105263, + "eval_logits/chosen": -2.078125, + "eval_logits/rejected": 1.203125, + "eval_logps/chosen": -272.0, + "eval_logps/rejected": -468.0, + "eval_loss": 0.49609375, + "eval_nll_loss": 0.49609375, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 11.3125, + "eval_rewards/margins": 7.5, + "eval_rewards/rejected": 3.78125, + "eval_runtime": 1.3623, + "eval_samples_per_second": 2.936, + "eval_steps_per_second": 0.734, + "step": 30 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 1.2141834186198552, + "learning_rate": 8.323979328069689e-05, + "logits/chosen": -0.512499988079071, + "logits/rejected": 0.31171876192092896, + "logps/chosen": -379.20001220703125, + "logps/rejected": -446.0, + "loss": 0.4916259765625, + "memory(GiB)": 40.52, + "nll_loss": 0.4892578125, + "rewards/accuracies": 1.0, + "rewards/chosen": 12.012499809265137, + "rewards/margins": 8.487500190734863, + "rewards/rejected": 3.543750047683716, + "step": 35, + "train_speed(iter/s)": 0.275044 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 1.0755142345948945, + "learning_rate": 7.74754489035403e-05, + "logits/chosen": -0.24981689453125, + "logits/rejected": 0.541796863079071, + "logps/chosen": -271.3999938964844, + "logps/rejected": -482.3999938964844, + "loss": 0.45865478515625, + "memory(GiB)": 40.52, + "nll_loss": 0.563281238079071, + "rewards/accuracies": 1.0, + "rewards/chosen": 11.9375, + "rewards/margins": 9.762499809265137, + "rewards/rejected": 2.1859374046325684, + "step": 40, + "train_speed(iter/s)": 0.278234 + }, + { + "epoch": 1.0526315789473684, + "eval_logits/chosen": -1.7734375, + "eval_logits/rejected": 1.65625, + "eval_logps/chosen": -253.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.4462890625, + "eval_nll_loss": 0.4453125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 13.125, + "eval_rewards/margins": 12.125, + "eval_rewards/rejected": 1.0, + "eval_runtime": 1.291, + "eval_samples_per_second": 3.098, + "eval_steps_per_second": 0.775, + "step": 40 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 0.6599652846962414, + "learning_rate": 7.113091308703498e-05, + "logits/chosen": -0.24863281846046448, + "logits/rejected": 0.8564453125, + "logps/chosen": -306.20001220703125, + "logps/rejected": -480.0, + "loss": 0.44638671875, + "memory(GiB)": 40.52, + "nll_loss": 0.44648438692092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.050000190734863, + "rewards/margins": 12.537500381469727, + "rewards/rejected": 1.506250023841858, + "step": 45, + "train_speed(iter/s)": 0.276176 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 0.5744853865576506, + "learning_rate": 6.434016163555452e-05, + "logits/chosen": 0.05844726413488388, + "logits/rejected": 1.05859375, + "logps/chosen": -300.20001220703125, + "logps/rejected": -461.6000061035156, + "loss": 0.434783935546875, + "memory(GiB)": 40.52, + "nll_loss": 0.4339843690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.8125, + "rewards/margins": 12.762499809265137, + "rewards/rejected": 2.0621094703674316, + "step": 50, + "train_speed(iter/s)": 0.279108 + }, + { + "epoch": 1.3157894736842106, + "eval_logits/chosen": -1.5546875, + "eval_logits/rejected": 1.9296875, + "eval_logps/chosen": -242.0, + "eval_logps/rejected": -496.0, + "eval_loss": 0.425537109375, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.25, + "eval_rewards/margins": 13.25, + "eval_rewards/rejected": 0.9765625, + "eval_runtime": 1.3278, + "eval_samples_per_second": 3.013, + "eval_steps_per_second": 0.753, + "step": 50 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 0.2926328226891597, + "learning_rate": 5.724659296536233e-05, + "logits/chosen": -0.1898193359375, + "logits/rejected": 1.259374976158142, + "logps/chosen": -261.20001220703125, + "logps/rejected": -431.20001220703125, + "loss": 0.40357666015625, + "memory(GiB)": 40.52, + "nll_loss": 0.40234375, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.324999809265137, + "rewards/margins": 12.612500190734863, + "rewards/rejected": 1.704687476158142, + "step": 55, + "train_speed(iter/s)": 0.280714 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 0.24140281258606347, + "learning_rate": 5e-05, + "logits/chosen": -0.07255859673023224, + "logits/rejected": 0.758007824420929, + "logps/chosen": -263.3999938964844, + "logps/rejected": -405.20001220703125, + "loss": 0.28531494140625, + "memory(GiB)": 40.52, + "nll_loss": 0.28496092557907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.337499618530273, + "rewards/margins": 12.774999618530273, + "rewards/rejected": 2.5546875, + "step": 60, + "train_speed(iter/s)": 0.283696 + }, + { + "epoch": 1.5789473684210527, + "eval_logits/chosen": -1.40625, + "eval_logits/rejected": 1.96875, + "eval_logps/chosen": -241.0, + "eval_logps/rejected": -498.0, + "eval_loss": 0.425048828125, + "eval_nll_loss": 0.42578125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.375, + "eval_rewards/margins": 13.5625, + "eval_rewards/rejected": 0.80078125, + "eval_runtime": 1.3237, + "eval_samples_per_second": 3.022, + "eval_steps_per_second": 0.755, + "step": 60 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 0.6094457836784298, + "learning_rate": 4.275340703463767e-05, + "logits/chosen": -0.5101562738418579, + "logits/rejected": 1.0482909679412842, + "logps/chosen": -261.0, + "logps/rejected": -409.6000061035156, + "loss": 0.3310546875, + "memory(GiB)": 40.52, + "nll_loss": 0.33125001192092896, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.774999618530273, + "rewards/margins": 14.050000190734863, + "rewards/rejected": 1.736328125, + "step": 65, + "train_speed(iter/s)": 0.283196 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 0.45260618201827363, + "learning_rate": 3.5659838364445505e-05, + "logits/chosen": 0.03581542894244194, + "logits/rejected": 1.0457031726837158, + "logps/chosen": -193.0, + "logps/rejected": -501.6000061035156, + "loss": 0.32975921630859373, + "memory(GiB)": 40.52, + "nll_loss": 0.3296875059604645, + "rewards/accuracies": 1.0, + "rewards/chosen": 14.487500190734863, + "rewards/margins": 12.287500381469727, + "rewards/rejected": 2.176562547683716, + "step": 70, + "train_speed(iter/s)": 0.283818 + }, + { + "epoch": 1.8421052631578947, + "eval_logits/chosen": -1.3203125, + "eval_logits/rejected": 1.9765625, + "eval_logps/chosen": -237.0, + "eval_logps/rejected": -500.0, + "eval_loss": 0.41455078125, + "eval_nll_loss": 0.4140625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.75, + "eval_rewards/margins": 14.0625, + "eval_rewards/rejected": 0.6796875, + "eval_runtime": 1.3685, + "eval_samples_per_second": 2.923, + "eval_steps_per_second": 0.731, + "step": 70 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 0.7812661467833826, + "learning_rate": 2.886908691296504e-05, + "logits/chosen": -0.42255860567092896, + "logits/rejected": 1.28125, + "logps/chosen": -298.0, + "logps/rejected": -456.79998779296875, + "loss": 0.40250244140625, + "memory(GiB)": 40.52, + "nll_loss": 0.4027343690395355, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.800000190734863, + "rewards/margins": 15.125, + "rewards/rejected": 0.692187488079071, + "step": 75, + "train_speed(iter/s)": 0.283316 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 0.5488588034556375, + "learning_rate": 2.25245510964597e-05, + "logits/chosen": -0.18242187798023224, + "logits/rejected": 1.212890625, + "logps/chosen": -239.39999389648438, + "logps/rejected": -437.20001220703125, + "loss": 0.3640625, + "memory(GiB)": 40.52, + "nll_loss": 0.38066405057907104, + "rewards/accuracies": 1.0, + "rewards/chosen": 15.3125, + "rewards/margins": 13.925000190734863, + "rewards/rejected": 1.3738281726837158, + "step": 80, + "train_speed(iter/s)": 0.282344 + }, + { + "epoch": 2.1052631578947367, + "eval_logits/chosen": -1.3046875, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -235.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.410400390625, + "eval_nll_loss": 0.41015625, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 14.9375, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": 0.375, + "eval_runtime": 1.3645, + "eval_samples_per_second": 2.931, + "eval_steps_per_second": 0.733, + "step": 80 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 0.5977630685878709, + "learning_rate": 1.6760206719303105e-05, + "logits/chosen": -0.2928222715854645, + "logits/rejected": 1.396875023841858, + "logps/chosen": -250.60000610351562, + "logps/rejected": -473.6000061035156, + "loss": 0.339306640625, + "memory(GiB)": 40.52, + "nll_loss": 0.3388671875, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.162500381469727, + "rewards/margins": 15.362500190734863, + "rewards/rejected": 0.8084961175918579, + "step": 85, + "train_speed(iter/s)": 0.282252 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 0.2955118166701661, + "learning_rate": 1.1697777844051105e-05, + "logits/chosen": -0.35551756620407104, + "logits/rejected": 0.749218761920929, + "logps/chosen": -254.39999389648438, + "logps/rejected": -422.0, + "loss": 0.3234832763671875, + "memory(GiB)": 40.52, + "nll_loss": 0.3232421875, + "rewards/accuracies": 1.0, + "rewards/chosen": 16.237499237060547, + "rewards/margins": 14.3125, + "rewards/rejected": 1.9640624523162842, + "step": 90, + "train_speed(iter/s)": 0.285022 + }, + { + "epoch": 2.3684210526315788, + "eval_logits/chosen": -1.265625, + "eval_logits/rejected": 2.0, + "eval_logps/chosen": -234.0, + "eval_logps/rejected": -502.0, + "eval_loss": 0.407958984375, + "eval_nll_loss": 0.408203125, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": 15.0625, + "eval_rewards/margins": 14.5625, + "eval_rewards/rejected": 0.5, + "eval_runtime": 1.3391, + "eval_samples_per_second": 2.987, + "eval_steps_per_second": 0.747, + "step": 90 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 114464163627008.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/training_args.bin b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e926cd924b33a09827843ef082da1793fb49fece --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:512ce35c3cb0d098001365fc17b658c77df2485cba0600e08e62a9d5e66bbbec +size 9016 diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/zero_to_fp32.py b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..0e759146cadd92ddfefab3680146c2bd6a2b5c04 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-90/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_logits_chosen.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_logits_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..397bb4af23d86228f502c7c41a3e876b7854c0b2 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_logits_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_logits_rejected.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_logits_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..8d9342a6c6e59aa70a9a01e0baa6fc4ad68df251 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_logits_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_logps_chosen.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_logps_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..ddb7263d0b26d2d077013a584b9f511d26ea7fae Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_logps_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_logps_rejected.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_logps_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..62f992ed219c5d8ca38243400fb01f4ba0b6c2cf Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_logps_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_loss.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1725ee17cc2b6e71e810ddfd5bf875a2fd19cecb Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_nll_loss.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_nll_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..0454e13c2533b36422fa58511bc59e9fa4200c67 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_nll_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_rewards_accuracies.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_rewards_accuracies.png new file mode 100644 index 0000000000000000000000000000000000000000..7cff44a67bd760c5d9b6b42ce804ca85a17528fe Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_rewards_accuracies.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_rewards_chosen.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_rewards_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..d74e211e77db48efa4f9758cf00f53e094e6da21 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_rewards_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_rewards_margins.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_rewards_margins.png new file mode 100644 index 0000000000000000000000000000000000000000..c6dd4c238c9ce5ea7ee64ed2e54b6ee82a1d0b20 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_rewards_margins.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_rewards_rejected.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_rewards_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..a05eb3bfa784a30b1635cbe77779e2d700561125 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_rewards_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_runtime.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..3b2fdefc00ccc16246836d35538da7416fe1a090 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_runtime.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_samples_per_second.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_samples_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..6f89506542b5bd5cff7dcca1e96e4f5358e65bcf Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_samples_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_steps_per_second.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_steps_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..62b134202ae027459a7460c60871dfdaaf661d34 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/eval_steps_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_epoch.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_epoch.png new file mode 100644 index 0000000000000000000000000000000000000000..c152c9bbb1100d680125e899d3d0da3db98dc221 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_epoch.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_grad_norm.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_grad_norm.png new file mode 100644 index 0000000000000000000000000000000000000000..ceebeb7d698f5ef455410c0f1edd2d8905191c84 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_grad_norm.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_learning_rate.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_learning_rate.png new file mode 100644 index 0000000000000000000000000000000000000000..e35e42c173f451f9666ba448872c615528441c9e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_learning_rate.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_logits_chosen.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_logits_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..863bb6baab60be9e840280dda25eb2835e5dc8af Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_logits_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_logits_rejected.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_logits_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..8079c77da4f2b9ebea7cd3834c860b40bde684ee Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_logits_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_logps_chosen.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_logps_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..8f8aa91d73b6625dd0960da05f9d7c13a902ff99 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_logps_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_logps_rejected.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_logps_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..be708ab7905aa10c8e0b369842f027bdac229922 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_logps_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_loss.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..7e45f0105a71a63c85f930df63f05e524a87f1a7 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_memory(GiB).png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_memory(GiB).png new file mode 100644 index 0000000000000000000000000000000000000000..9d9c098a0847c34f42f6a52c4e41cfd9d50b0cc7 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_memory(GiB).png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_nll_loss.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_nll_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..7b79bc2016c8598d8fd34733a3524e30ca53b3d1 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_nll_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_rewards_accuracies.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_rewards_accuracies.png new file mode 100644 index 0000000000000000000000000000000000000000..94ed77cf4aae618ac3ed467bfd76413b51c879e0 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_rewards_accuracies.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_rewards_chosen.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_rewards_chosen.png new file mode 100644 index 0000000000000000000000000000000000000000..1b9394a46d0031efefd68e370fb5953e003f696e Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_rewards_chosen.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_rewards_margins.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_rewards_margins.png new file mode 100644 index 0000000000000000000000000000000000000000..621a626b308b0b9c323d24c5f0584f61e3ceeca4 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_rewards_margins.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_rewards_rejected.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_rewards_rejected.png new file mode 100644 index 0000000000000000000000000000000000000000..f79679cd455d91d293aebe7e92db67c5aa31162d Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_rewards_rejected.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_total_flos.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_total_flos.png new file mode 100644 index 0000000000000000000000000000000000000000..01679f71d7240e2dfff26c8487d16cab218ebda9 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_total_flos.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_loss.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..e7cfc4051fbc65950014aff1717b72b7d17c707b Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_loss.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_runtime.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_runtime.png new file mode 100644 index 0000000000000000000000000000000000000000..7723011d22343e232c8efef2978d1de0770df22c Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_runtime.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_samples_per_second.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_samples_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..ff0f412aaa896533b718cea4368cdeb1c4f4f724 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_samples_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_speed(iter_s).png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_speed(iter_s).png new file mode 100644 index 0000000000000000000000000000000000000000..b63379925ff936172f2bddebbc0526eb255e5cad Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_speed(iter_s).png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_steps_per_second.png b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_steps_per_second.png new file mode 100644 index 0000000000000000000000000000000000000000..a2afc087f6e9edf78abb66236e8d5cb46e407571 Binary files /dev/null and b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/images/train_train_steps_per_second.png differ diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/logging.jsonl b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/logging.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c409d5cd983a96a831b0c12d680ff1a13cc6a099 --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/logging.jsonl @@ -0,0 +1,37 @@ +{"loss": 1.12939453, "grad_norm": 10.70593007, "learning_rate": 1.667e-05, "memory(GiB)": 6.7, "train_speed(iter/s)": 0.113961, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -444.0, "logps/rejected": -360.0, "logits/chosen": 0.828125, "logits/rejected": -0.28320312, "nll_loss": 0.43945312, "epoch": 0.02631579, "global_step/max_steps": "1/114", "percentage": "0.88%", "elapsed_time": "5s", "remaining_time": "9m 35s"} +{"loss": 1.87512207, "grad_norm": 14.17648501, "learning_rate": 8.333e-05, "memory(GiB)": 16.45, "train_speed(iter/s)": 0.234379, "rewards/chosen": 0.01715088, "rewards/rejected": 0.0093689, "rewards/accuracies": 0.1875, "rewards/margins": 0.0078125, "logps/chosen": -377.25, "logps/rejected": -512.0, "logits/chosen": -0.12988281, "logits/rejected": 0.03759766, "nll_loss": 1.17480469, "epoch": 0.13157895, "global_step/max_steps": "5/114", "percentage": "4.39%", "elapsed_time": "17s", "remaining_time": "6m 24s"} +{"loss": 2.12158203, "grad_norm": 12.01514742, "learning_rate": 9.966e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.254866, "rewards/chosen": 0.70292968, "rewards/rejected": 0.29750976, "rewards/accuracies": 0.67500001, "rewards/margins": 0.40517578, "logps/chosen": -462.79998779, "logps/rejected": -507.20001221, "logits/chosen": 0.07973633, "logits/rejected": 0.19414063, "nll_loss": 1.55312502, "epoch": 0.26315789, "global_step/max_steps": "10/114", "percentage": "8.77%", "elapsed_time": "35s", "remaining_time": "6m 9s"} +{"eval_loss": 1.19140625, "eval_runtime": 1.3322, "eval_samples_per_second": 3.002, "eval_steps_per_second": 0.751, "eval_rewards/chosen": 2.40625, "eval_rewards/rejected": 0.92578125, "eval_rewards/accuracies": 0.75, "eval_rewards/margins": 1.4765625, "eval_logps/chosen": -360.0, "eval_logps/rejected": -496.0, "eval_logits/chosen": -2.5, "eval_logits/rejected": 1.03125, "eval_nll_loss": 0.796875, "epoch": 0.26315789, "global_step/max_steps": "10/114", "percentage": "8.77%", "elapsed_time": "36s", "remaining_time": "6m 23s"} +{"loss": 1.00117188, "grad_norm": 9.41426351, "learning_rate": 9.83e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.266928, "rewards/chosen": 3.36406255, "rewards/rejected": 1.1257813, "rewards/accuracies": 0.875, "rewards/margins": 2.234375, "logps/chosen": -353.6000061, "logps/rejected": -487.6000061, "logits/chosen": -0.29057616, "logits/rejected": 0.11259766, "nll_loss": 0.73124999, "epoch": 0.39473684, "global_step/max_steps": "15/114", "percentage": "13.16%", "elapsed_time": "52s", "remaining_time": "5m 46s"} +{"loss": 1.11672363, "grad_norm": 3.41776698, "learning_rate": 9.591e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.275171, "rewards/chosen": 6.8125, "rewards/rejected": 3.54375005, "rewards/accuracies": 0.92500001, "rewards/margins": 3.26250005, "logps/chosen": -258.20001221, "logps/rejected": -377.6000061, "logits/chosen": -0.8324219, "logits/rejected": 0.40078124, "nll_loss": 0.92265624, "epoch": 0.52631579, "global_step/max_steps": "20/114", "percentage": "17.54%", "elapsed_time": "1m 8s", "remaining_time": "5m 24s"} +{"eval_loss": 0.66601562, "eval_runtime": 1.3177, "eval_samples_per_second": 3.036, "eval_steps_per_second": 0.759, "eval_rewards/chosen": 8.6875, "eval_rewards/rejected": 4.78125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 3.9375, "eval_logps/chosen": -298.0, "eval_logps/rejected": -458.0, "eval_logits/chosen": -2.5, "eval_logits/rejected": 1.0859375, "eval_nll_loss": 0.5625, "epoch": 0.52631579, "global_step/max_steps": "20/114", "percentage": "17.54%", "elapsed_time": "1m 10s", "remaining_time": "5m 30s"} +{"loss": 0.67348633, "grad_norm": 2.72198252, "learning_rate": 9.256e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.274036, "rewards/chosen": 9.07499981, "rewards/rejected": 4.68437481, "rewards/accuracies": 1.0, "rewards/margins": 4.39375019, "logps/chosen": -373.6000061, "logps/rejected": -422.20001221, "logits/chosen": -0.38144532, "logits/rejected": 0.09628906, "nll_loss": 0.60078126, "epoch": 0.65789474, "global_step/max_steps": "25/114", "percentage": "21.93%", "elapsed_time": "1m 27s", "remaining_time": "5m 11s"} +{"loss": 0.50615234, "grad_norm": 0.98660843, "learning_rate": 8.83e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.277362, "rewards/chosen": 10.94999981, "rewards/rejected": 4.8843751, "rewards/accuracies": 1.0, "rewards/margins": 6.0687499, "logps/chosen": -250.69999695, "logps/rejected": -412.79998779, "logits/chosen": -0.57822263, "logits/rejected": 0.46054688, "nll_loss": 0.48750001, "epoch": 0.78947368, "global_step/max_steps": "30/114", "percentage": "26.32%", "elapsed_time": "1m 44s", "remaining_time": "4m 52s"} +{"eval_loss": 0.49609375, "eval_runtime": 1.3623, "eval_samples_per_second": 2.936, "eval_steps_per_second": 0.734, "eval_rewards/chosen": 11.3125, "eval_rewards/rejected": 3.78125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 7.5, "eval_logps/chosen": -272.0, "eval_logps/rejected": -468.0, "eval_logits/chosen": -2.078125, "eval_logits/rejected": 1.203125, "eval_nll_loss": 0.49609375, "epoch": 0.78947368, "global_step/max_steps": "30/114", "percentage": "26.32%", "elapsed_time": "1m 45s", "remaining_time": "4m 56s"} +{"loss": 0.49162598, "grad_norm": 1.21418342, "learning_rate": 8.324e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.275044, "rewards/chosen": 12.01249981, "rewards/rejected": 3.54375005, "rewards/accuracies": 1.0, "rewards/margins": 8.48750019, "logps/chosen": -379.20001221, "logps/rejected": -446.0, "logits/chosen": -0.51249999, "logits/rejected": 0.31171876, "nll_loss": 0.48925781, "epoch": 0.92105263, "global_step/max_steps": "35/114", "percentage": "30.70%", "elapsed_time": "2m 3s", "remaining_time": "4m 38s"} +{"loss": 0.45865479, "grad_norm": 1.07551423, "learning_rate": 7.748e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.278234, "rewards/chosen": 11.9375, "rewards/rejected": 2.1859374, "rewards/accuracies": 1.0, "rewards/margins": 9.76249981, "logps/chosen": -271.3999939, "logps/rejected": -482.3999939, "logits/chosen": -0.24981689, "logits/rejected": 0.54179686, "nll_loss": 0.56328124, "epoch": 1.05263158, "global_step/max_steps": "40/114", "percentage": "35.09%", "elapsed_time": "2m 20s", "remaining_time": "4m 19s"} +{"eval_loss": 0.44628906, "eval_runtime": 1.291, "eval_samples_per_second": 3.098, "eval_steps_per_second": 0.775, "eval_rewards/chosen": 13.125, "eval_rewards/rejected": 1.0, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 12.125, "eval_logps/chosen": -253.0, "eval_logps/rejected": -496.0, "eval_logits/chosen": -1.7734375, "eval_logits/rejected": 1.65625, "eval_nll_loss": 0.4453125, "epoch": 1.05263158, "global_step/max_steps": "40/114", "percentage": "35.09%", "elapsed_time": "2m 21s", "remaining_time": "4m 21s"} +{"loss": 0.44638672, "grad_norm": 0.65996528, "learning_rate": 7.113e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.276176, "rewards/chosen": 14.05000019, "rewards/rejected": 1.50625002, "rewards/accuracies": 1.0, "rewards/margins": 12.53750038, "logps/chosen": -306.20001221, "logps/rejected": -480.0, "logits/chosen": -0.24863282, "logits/rejected": 0.85644531, "nll_loss": 0.44648439, "epoch": 1.18421053, "global_step/max_steps": "45/114", "percentage": "39.47%", "elapsed_time": "2m 39s", "remaining_time": "4m 4s"} +{"loss": 0.43478394, "grad_norm": 0.57448539, "learning_rate": 6.434e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.279108, "rewards/chosen": 14.8125, "rewards/rejected": 2.06210947, "rewards/accuracies": 1.0, "rewards/margins": 12.76249981, "logps/chosen": -300.20001221, "logps/rejected": -461.6000061, "logits/chosen": 0.05844726, "logits/rejected": 1.05859375, "nll_loss": 0.43398437, "epoch": 1.31578947, "global_step/max_steps": "50/114", "percentage": "43.86%", "elapsed_time": "2m 55s", "remaining_time": "3m 44s"} +{"eval_loss": 0.42553711, "eval_runtime": 1.3278, "eval_samples_per_second": 3.013, "eval_steps_per_second": 0.753, "eval_rewards/chosen": 14.25, "eval_rewards/rejected": 0.9765625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.25, "eval_logps/chosen": -242.0, "eval_logps/rejected": -496.0, "eval_logits/chosen": -1.5546875, "eval_logits/rejected": 1.9296875, "eval_nll_loss": 0.42578125, "epoch": 1.31578947, "global_step/max_steps": "50/114", "percentage": "43.86%", "elapsed_time": "2m 56s", "remaining_time": "3m 46s"} +{"loss": 0.40357666, "grad_norm": 0.29263282, "learning_rate": 5.725e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.280714, "rewards/chosen": 14.32499981, "rewards/rejected": 1.70468748, "rewards/accuracies": 1.0, "rewards/margins": 12.61250019, "logps/chosen": -261.20001221, "logps/rejected": -431.20001221, "logits/chosen": -0.18981934, "logits/rejected": 1.25937498, "nll_loss": 0.40234375, "epoch": 1.44736842, "global_step/max_steps": "55/114", "percentage": "48.25%", "elapsed_time": "3m 12s", "remaining_time": "3m 26s"} +{"loss": 0.28531494, "grad_norm": 0.24140281, "learning_rate": 5e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.283696, "rewards/chosen": 15.33749962, "rewards/rejected": 2.5546875, "rewards/accuracies": 1.0, "rewards/margins": 12.77499962, "logps/chosen": -263.3999939, "logps/rejected": -405.20001221, "logits/chosen": -0.0725586, "logits/rejected": 0.75800782, "nll_loss": 0.28496093, "epoch": 1.57894737, "global_step/max_steps": "60/114", "percentage": "52.63%", "elapsed_time": "3m 27s", "remaining_time": "3m 7s"} +{"eval_loss": 0.42504883, "eval_runtime": 1.3237, "eval_samples_per_second": 3.022, "eval_steps_per_second": 0.755, "eval_rewards/chosen": 14.375, "eval_rewards/rejected": 0.80078125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.5625, "eval_logps/chosen": -241.0, "eval_logps/rejected": -498.0, "eval_logits/chosen": -1.40625, "eval_logits/rejected": 1.96875, "eval_nll_loss": 0.42578125, "epoch": 1.57894737, "global_step/max_steps": "60/114", "percentage": "52.63%", "elapsed_time": "3m 29s", "remaining_time": "3m 8s"} +{"loss": 0.33105469, "grad_norm": 0.60944578, "learning_rate": 4.275e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.283196, "rewards/chosen": 15.77499962, "rewards/rejected": 1.73632812, "rewards/accuracies": 1.0, "rewards/margins": 14.05000019, "logps/chosen": -261.0, "logps/rejected": -409.6000061, "logits/chosen": -0.51015627, "logits/rejected": 1.04829097, "nll_loss": 0.33125001, "epoch": 1.71052632, "global_step/max_steps": "65/114", "percentage": "57.02%", "elapsed_time": "3m 45s", "remaining_time": "2m 50s"} +{"loss": 0.32975922, "grad_norm": 0.45260618, "learning_rate": 3.566e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.283818, "rewards/chosen": 14.48750019, "rewards/rejected": 2.17656255, "rewards/accuracies": 1.0, "rewards/margins": 12.28750038, "logps/chosen": -193.0, "logps/rejected": -501.6000061, "logits/chosen": 0.03581543, "logits/rejected": 1.04570317, "nll_loss": 0.32968751, "epoch": 1.84210526, "global_step/max_steps": "70/114", "percentage": "61.40%", "elapsed_time": "4m 2s", "remaining_time": "2m 32s"} +{"eval_loss": 0.41455078, "eval_runtime": 1.3685, "eval_samples_per_second": 2.923, "eval_steps_per_second": 0.731, "eval_rewards/chosen": 14.75, "eval_rewards/rejected": 0.6796875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.0625, "eval_logps/chosen": -237.0, "eval_logps/rejected": -500.0, "eval_logits/chosen": -1.3203125, "eval_logits/rejected": 1.9765625, "eval_nll_loss": 0.4140625, "epoch": 1.84210526, "global_step/max_steps": "70/114", "percentage": "61.40%", "elapsed_time": "4m 4s", "remaining_time": "2m 33s"} +{"loss": 0.40250244, "grad_norm": 0.78126615, "learning_rate": 2.887e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.283316, "rewards/chosen": 15.80000019, "rewards/rejected": 0.69218749, "rewards/accuracies": 1.0, "rewards/margins": 15.125, "logps/chosen": -298.0, "logps/rejected": -456.79998779, "logits/chosen": -0.42255861, "logits/rejected": 1.28125, "nll_loss": 0.40273437, "epoch": 1.97368421, "global_step/max_steps": "75/114", "percentage": "65.79%", "elapsed_time": "4m 21s", "remaining_time": "2m 15s"} +{"loss": 0.3640625, "grad_norm": 0.5488588, "learning_rate": 2.252e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.282344, "rewards/chosen": 15.3125, "rewards/rejected": 1.37382817, "rewards/accuracies": 1.0, "rewards/margins": 13.92500019, "logps/chosen": -239.3999939, "logps/rejected": -437.20001221, "logits/chosen": -0.18242188, "logits/rejected": 1.21289062, "nll_loss": 0.38066405, "epoch": 2.10526316, "global_step/max_steps": "80/114", "percentage": "70.18%", "elapsed_time": "4m 39s", "remaining_time": "1m 58s"} +{"eval_loss": 0.41040039, "eval_runtime": 1.3645, "eval_samples_per_second": 2.931, "eval_steps_per_second": 0.733, "eval_rewards/chosen": 14.9375, "eval_rewards/rejected": 0.375, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.5625, "eval_logps/chosen": -235.0, "eval_logps/rejected": -502.0, "eval_logits/chosen": -1.3046875, "eval_logits/rejected": 2.0, "eval_nll_loss": 0.41015625, "epoch": 2.10526316, "global_step/max_steps": "80/114", "percentage": "70.18%", "elapsed_time": "4m 41s", "remaining_time": "1m 59s"} +{"loss": 0.33930664, "grad_norm": 0.59776307, "learning_rate": 1.676e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.282252, "rewards/chosen": 16.16250038, "rewards/rejected": 0.80849612, "rewards/accuracies": 1.0, "rewards/margins": 15.36250019, "logps/chosen": -250.6000061, "logps/rejected": -473.6000061, "logits/chosen": -0.29282227, "logits/rejected": 1.39687502, "nll_loss": 0.33886719, "epoch": 2.23684211, "global_step/max_steps": "85/114", "percentage": "74.56%", "elapsed_time": "4m 57s", "remaining_time": "1m 41s"} +{"loss": 0.32348328, "grad_norm": 0.29551182, "learning_rate": 1.17e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.285022, "rewards/chosen": 16.23749924, "rewards/rejected": 1.96406245, "rewards/accuracies": 1.0, "rewards/margins": 14.3125, "logps/chosen": -254.3999939, "logps/rejected": -422.0, "logits/chosen": -0.35551757, "logits/rejected": 0.74921876, "nll_loss": 0.32324219, "epoch": 2.36842105, "global_step/max_steps": "90/114", "percentage": "78.95%", "elapsed_time": "5m 12s", "remaining_time": "1m 23s"} +{"eval_loss": 0.40795898, "eval_runtime": 1.3391, "eval_samples_per_second": 2.987, "eval_steps_per_second": 0.747, "eval_rewards/chosen": 15.0625, "eval_rewards/rejected": 0.5, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.5625, "eval_logps/chosen": -234.0, "eval_logps/rejected": -502.0, "eval_logits/chosen": -1.265625, "eval_logits/rejected": 2.0, "eval_nll_loss": 0.40820312, "epoch": 2.36842105, "global_step/max_steps": "90/114", "percentage": "78.95%", "elapsed_time": "5m 13s", "remaining_time": "1m 23s"} +{"loss": 0.41697998, "grad_norm": 0.25565176, "learning_rate": 7.44e-06, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.283485, "rewards/chosen": 17.38750076, "rewards/rejected": 0.70209962, "rewards/accuracies": 1.0, "rewards/margins": 16.70000076, "logps/chosen": -312.6000061, "logps/rejected": -527.59997559, "logits/chosen": -0.21904297, "logits/rejected": 1.04218745, "nll_loss": 0.41621095, "epoch": 2.5, "global_step/max_steps": "95/114", "percentage": "83.33%", "elapsed_time": "5m 31s", "remaining_time": "1m 6s"} +{"loss": 0.3519104, "grad_norm": 0.4478832, "learning_rate": 4.09e-06, "memory(GiB)": 49.77, "train_speed(iter/s)": 0.28525, "rewards/chosen": 16.46249962, "rewards/rejected": 0.85039061, "rewards/accuracies": 1.0, "rewards/margins": 15.60000038, "logps/chosen": -245.8999939, "logps/rejected": -406.79998779, "logits/chosen": -0.53261721, "logits/rejected": 1.3898437, "nll_loss": 0.35175782, "epoch": 2.63157895, "global_step/max_steps": "100/114", "percentage": "87.72%", "elapsed_time": "5m 46s", "remaining_time": "48s"} +{"eval_loss": 0.40893555, "eval_runtime": 1.3384, "eval_samples_per_second": 2.989, "eval_steps_per_second": 0.747, "eval_rewards/chosen": 15.0625, "eval_rewards/rejected": 0.6015625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.5, "eval_logps/chosen": -234.0, "eval_logps/rejected": -500.0, "eval_logits/chosen": -1.265625, "eval_logits/rejected": 2.0, "eval_nll_loss": 0.40820312, "epoch": 2.63157895, "global_step/max_steps": "100/114", "percentage": "87.72%", "elapsed_time": "5m 48s", "remaining_time": "48s"} +{"loss": 0.3163269, "grad_norm": 0.78974186, "learning_rate": 1.7e-06, "memory(GiB)": 49.77, "train_speed(iter/s)": 0.284651, "rewards/chosen": 16.01250076, "rewards/rejected": 2.20351553, "rewards/accuracies": 1.0, "rewards/margins": 13.80000019, "logps/chosen": -228.3999939, "logps/rejected": -440.79998779, "logits/chosen": -0.31601563, "logits/rejected": 1.13046873, "nll_loss": 0.31601563, "epoch": 2.76315789, "global_step/max_steps": "105/114", "percentage": "92.11%", "elapsed_time": "6m 5s", "remaining_time": "31s"} +{"loss": 0.31457977, "grad_norm": 0.45474076, "learning_rate": 3.4e-07, "memory(GiB)": 49.77, "train_speed(iter/s)": 0.285606, "rewards/chosen": 15.82499981, "rewards/rejected": 2.11406255, "rewards/accuracies": 1.0, "rewards/margins": 13.69999981, "logps/chosen": -212.3999939, "logps/rejected": -443.6000061, "logits/chosen": -0.3533203, "logits/rejected": 1.18124998, "nll_loss": 0.31445312, "epoch": 2.89473684, "global_step/max_steps": "110/114", "percentage": "96.49%", "elapsed_time": "6m 21s", "remaining_time": "13s"} +{"eval_loss": 0.40771484, "eval_runtime": 1.3701, "eval_samples_per_second": 2.92, "eval_steps_per_second": 0.73, "eval_rewards/chosen": 15.125, "eval_rewards/rejected": 0.5, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.5625, "eval_logps/chosen": -234.0, "eval_logps/rejected": -502.0, "eval_logits/chosen": -1.265625, "eval_logits/rejected": 2.0, "eval_nll_loss": 0.40820312, "epoch": 2.89473684, "global_step/max_steps": "110/114", "percentage": "96.49%", "elapsed_time": "6m 22s", "remaining_time": "13s"} +{"eval_loss": 0.40698242, "eval_runtime": 1.2905, "eval_samples_per_second": 3.1, "eval_steps_per_second": 0.775, "eval_rewards/chosen": 15.125, "eval_rewards/rejected": 0.4765625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.625, "eval_logps/chosen": -234.0, "eval_logps/rejected": -502.0, "eval_logits/chosen": -1.2578125, "eval_logits/rejected": 2.0, "eval_nll_loss": 0.40625, "epoch": 3.0, "global_step/max_steps": "114/114", "percentage": "100.00%", "elapsed_time": "6m 39s", "remaining_time": "0s"} +{"train_runtime": 400.4982, "train_samples_per_second": 2.232, "train_steps_per_second": 0.285, "total_flos": 145005522976768.0, "train_loss": 0.58385294, "epoch": 3.0, "global_step/max_steps": "114/114", "percentage": "100.00%", "elapsed_time": "6m 40s", "remaining_time": "0s"} +{"train_dataset": "1698.815436±897.000106, min=182.000000, max=4081.000000, size=298", "val_dataset": "1637.250000±797.581461, min=755.000000, max=2485.000000, size=4", "model_parameter_info": "PeftModelForCausalLM: 7635.8016M Params (20.1851M Trainable [0.2643%]), 0.0001M Buffers.", "last_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114", "best_model_checkpoint": "/m2v_intern/wangruotong/logs/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/checkpoint-114", "best_metric": 0.40698242, "global_step": 114, "log_history": [{"loss": 1.12939453125, "grad_norm": 10.70593006577457, "learning_rate": 1.6666666666666667e-05, "memory(GiB)": 6.7, "train_speed(iter/s)": 0.113961, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -444.0, "logps/rejected": -360.0, "logits/chosen": 0.828125, "logits/rejected": -0.283203125, "nll_loss": 0.439453125, "epoch": 0.02631578947368421, "step": 1}, {"loss": 1.8751220703125, "grad_norm": 14.176485007231813, "learning_rate": 8.333333333333334e-05, "memory(GiB)": 16.45, "train_speed(iter/s)": 0.234379, "rewards/chosen": 0.01715087890625, "rewards/rejected": 0.009368896484375, "rewards/accuracies": 0.1875, "rewards/margins": 0.0078125, "logps/chosen": -377.25, "logps/rejected": -512.0, "logits/chosen": -0.1298828125, "logits/rejected": 0.03759765625, "nll_loss": 1.1748046875, "epoch": 0.13157894736842105, "step": 5}, {"loss": 2.12158203125, "grad_norm": 12.015147423707822, "learning_rate": 9.966191788709716e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.254866, "rewards/chosen": 0.702929675579071, "rewards/rejected": 0.2975097596645355, "rewards/accuracies": 0.675000011920929, "rewards/margins": 0.4051757752895355, "logps/chosen": -462.79998779296875, "logps/rejected": -507.20001220703125, "logits/chosen": 0.07973632961511612, "logits/rejected": 0.19414062798023224, "nll_loss": 1.553125023841858, "epoch": 0.2631578947368421, "step": 10}, {"eval_loss": 1.19140625, "eval_runtime": 1.3322, "eval_samples_per_second": 3.002, "eval_steps_per_second": 0.751, "eval_rewards/chosen": 2.40625, "eval_rewards/rejected": 0.92578125, "eval_rewards/accuracies": 0.75, "eval_rewards/margins": 1.4765625, "eval_logps/chosen": -360.0, "eval_logps/rejected": -496.0, "eval_logits/chosen": -2.5, "eval_logits/rejected": 1.03125, "eval_nll_loss": 0.796875, "epoch": 0.2631578947368421, "step": 10}, {"loss": 1.001171875, "grad_norm": 9.41426350911575, "learning_rate": 9.829629131445342e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.266928, "rewards/chosen": 3.364062547683716, "rewards/rejected": 1.1257812976837158, "rewards/accuracies": 0.875, "rewards/margins": 2.234375, "logps/chosen": -353.6000061035156, "logps/rejected": -487.6000061035156, "logits/chosen": -0.29057615995407104, "logits/rejected": 0.11259765923023224, "nll_loss": 0.731249988079071, "epoch": 0.39473684210526316, "step": 15}, {"loss": 1.1167236328125, "grad_norm": 3.417766982037976, "learning_rate": 9.591080534401371e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.275171, "rewards/chosen": 6.8125, "rewards/rejected": 3.543750047683716, "rewards/accuracies": 0.925000011920929, "rewards/margins": 3.262500047683716, "logps/chosen": -258.20001220703125, "logps/rejected": -377.6000061035156, "logits/chosen": -0.8324218988418579, "logits/rejected": 0.4007812440395355, "nll_loss": 0.922656238079071, "epoch": 0.5263157894736842, "step": 20}, {"eval_loss": 0.666015625, "eval_runtime": 1.3177, "eval_samples_per_second": 3.036, "eval_steps_per_second": 0.759, "eval_rewards/chosen": 8.6875, "eval_rewards/rejected": 4.78125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 3.9375, "eval_logps/chosen": -298.0, "eval_logps/rejected": -458.0, "eval_logits/chosen": -2.5, "eval_logits/rejected": 1.0859375, "eval_nll_loss": 0.5625, "epoch": 0.5263157894736842, "step": 20}, {"loss": 0.673486328125, "grad_norm": 2.721982515360601, "learning_rate": 9.255583362184999e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.274036, "rewards/chosen": 9.074999809265137, "rewards/rejected": 4.684374809265137, "rewards/accuracies": 1.0, "rewards/margins": 4.393750190734863, "logps/chosen": -373.6000061035156, "logps/rejected": -422.20001220703125, "logits/chosen": -0.3814453184604645, "logits/rejected": 0.09628906100988388, "nll_loss": 0.600781261920929, "epoch": 0.6578947368421053, "step": 25}, {"loss": 0.50615234375, "grad_norm": 0.9866084312671491, "learning_rate": 8.83022221559489e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.277362, "rewards/chosen": 10.949999809265137, "rewards/rejected": 4.884375095367432, "rewards/accuracies": 1.0, "rewards/margins": 6.068749904632568, "logps/chosen": -250.6999969482422, "logps/rejected": -412.79998779296875, "logits/chosen": -0.5782226324081421, "logits/rejected": 0.4605468809604645, "nll_loss": 0.48750001192092896, "epoch": 0.7894736842105263, "step": 30}, {"eval_loss": 0.49609375, "eval_runtime": 1.3623, "eval_samples_per_second": 2.936, "eval_steps_per_second": 0.734, "eval_rewards/chosen": 11.3125, "eval_rewards/rejected": 3.78125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 7.5, "eval_logps/chosen": -272.0, "eval_logps/rejected": -468.0, "eval_logits/chosen": -2.078125, "eval_logits/rejected": 1.203125, "eval_nll_loss": 0.49609375, "epoch": 0.7894736842105263, "step": 30}, {"loss": 0.4916259765625, "grad_norm": 1.2141834186198552, "learning_rate": 8.323979328069689e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.275044, "rewards/chosen": 12.012499809265137, "rewards/rejected": 3.543750047683716, "rewards/accuracies": 1.0, "rewards/margins": 8.487500190734863, "logps/chosen": -379.20001220703125, "logps/rejected": -446.0, "logits/chosen": -0.512499988079071, "logits/rejected": 0.31171876192092896, "nll_loss": 0.4892578125, "epoch": 0.9210526315789473, "step": 35}, {"loss": 0.45865478515625, "grad_norm": 1.0755142345948945, "learning_rate": 7.74754489035403e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.278234, "rewards/chosen": 11.9375, "rewards/rejected": 2.1859374046325684, "rewards/accuracies": 1.0, "rewards/margins": 9.762499809265137, "logps/chosen": -271.3999938964844, "logps/rejected": -482.3999938964844, "logits/chosen": -0.24981689453125, "logits/rejected": 0.541796863079071, "nll_loss": 0.563281238079071, "epoch": 1.0526315789473684, "step": 40}, {"eval_loss": 0.4462890625, "eval_runtime": 1.291, "eval_samples_per_second": 3.098, "eval_steps_per_second": 0.775, "eval_rewards/chosen": 13.125, "eval_rewards/rejected": 1.0, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 12.125, "eval_logps/chosen": -253.0, "eval_logps/rejected": -496.0, "eval_logits/chosen": -1.7734375, "eval_logits/rejected": 1.65625, "eval_nll_loss": 0.4453125, "epoch": 1.0526315789473684, "step": 40}, {"loss": 0.44638671875, "grad_norm": 0.6599652846962414, "learning_rate": 7.113091308703498e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.276176, "rewards/chosen": 14.050000190734863, "rewards/rejected": 1.506250023841858, "rewards/accuracies": 1.0, "rewards/margins": 12.537500381469727, "logps/chosen": -306.20001220703125, "logps/rejected": -480.0, "logits/chosen": -0.24863281846046448, "logits/rejected": 0.8564453125, "nll_loss": 0.44648438692092896, "epoch": 1.1842105263157894, "step": 45}, {"loss": 0.434783935546875, "grad_norm": 0.5744853865576506, "learning_rate": 6.434016163555452e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.279108, "rewards/chosen": 14.8125, "rewards/rejected": 2.0621094703674316, "rewards/accuracies": 1.0, "rewards/margins": 12.762499809265137, "logps/chosen": -300.20001220703125, "logps/rejected": -461.6000061035156, "logits/chosen": 0.05844726413488388, "logits/rejected": 1.05859375, "nll_loss": 0.4339843690395355, "epoch": 1.3157894736842106, "step": 50}, {"eval_loss": 0.425537109375, "eval_runtime": 1.3278, "eval_samples_per_second": 3.013, "eval_steps_per_second": 0.753, "eval_rewards/chosen": 14.25, "eval_rewards/rejected": 0.9765625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.25, "eval_logps/chosen": -242.0, "eval_logps/rejected": -496.0, "eval_logits/chosen": -1.5546875, "eval_logits/rejected": 1.9296875, "eval_nll_loss": 0.42578125, "epoch": 1.3157894736842106, "step": 50}, {"loss": 0.40357666015625, "grad_norm": 0.2926328226891597, "learning_rate": 5.724659296536233e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.280714, "rewards/chosen": 14.324999809265137, "rewards/rejected": 1.704687476158142, "rewards/accuracies": 1.0, "rewards/margins": 12.612500190734863, "logps/chosen": -261.20001220703125, "logps/rejected": -431.20001220703125, "logits/chosen": -0.1898193359375, "logits/rejected": 1.259374976158142, "nll_loss": 0.40234375, "epoch": 1.4473684210526316, "step": 55}, {"loss": 0.28531494140625, "grad_norm": 0.24140281258606347, "learning_rate": 5e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.283696, "rewards/chosen": 15.337499618530273, "rewards/rejected": 2.5546875, "rewards/accuracies": 1.0, "rewards/margins": 12.774999618530273, "logps/chosen": -263.3999938964844, "logps/rejected": -405.20001220703125, "logits/chosen": -0.07255859673023224, "logits/rejected": 0.758007824420929, "nll_loss": 0.28496092557907104, "epoch": 1.5789473684210527, "step": 60}, {"eval_loss": 0.425048828125, "eval_runtime": 1.3237, "eval_samples_per_second": 3.022, "eval_steps_per_second": 0.755, "eval_rewards/chosen": 14.375, "eval_rewards/rejected": 0.80078125, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 13.5625, "eval_logps/chosen": -241.0, "eval_logps/rejected": -498.0, "eval_logits/chosen": -1.40625, "eval_logits/rejected": 1.96875, "eval_nll_loss": 0.42578125, "epoch": 1.5789473684210527, "step": 60}, {"loss": 0.3310546875, "grad_norm": 0.6094457836784298, "learning_rate": 4.275340703463767e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.283196, "rewards/chosen": 15.774999618530273, "rewards/rejected": 1.736328125, "rewards/accuracies": 1.0, "rewards/margins": 14.050000190734863, "logps/chosen": -261.0, "logps/rejected": -409.6000061035156, "logits/chosen": -0.5101562738418579, "logits/rejected": 1.0482909679412842, "nll_loss": 0.33125001192092896, "epoch": 1.7105263157894737, "step": 65}, {"loss": 0.32975921630859373, "grad_norm": 0.45260618201827363, "learning_rate": 3.5659838364445505e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.283818, "rewards/chosen": 14.487500190734863, "rewards/rejected": 2.176562547683716, "rewards/accuracies": 1.0, "rewards/margins": 12.287500381469727, "logps/chosen": -193.0, "logps/rejected": -501.6000061035156, "logits/chosen": 0.03581542894244194, "logits/rejected": 1.0457031726837158, "nll_loss": 0.3296875059604645, "epoch": 1.8421052631578947, "step": 70}, {"eval_loss": 0.41455078125, "eval_runtime": 1.3685, "eval_samples_per_second": 2.923, "eval_steps_per_second": 0.731, "eval_rewards/chosen": 14.75, "eval_rewards/rejected": 0.6796875, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.0625, "eval_logps/chosen": -237.0, "eval_logps/rejected": -500.0, "eval_logits/chosen": -1.3203125, "eval_logits/rejected": 1.9765625, "eval_nll_loss": 0.4140625, "epoch": 1.8421052631578947, "step": 70}, {"loss": 0.40250244140625, "grad_norm": 0.7812661467833826, "learning_rate": 2.886908691296504e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.283316, "rewards/chosen": 15.800000190734863, "rewards/rejected": 0.692187488079071, "rewards/accuracies": 1.0, "rewards/margins": 15.125, "logps/chosen": -298.0, "logps/rejected": -456.79998779296875, "logits/chosen": -0.42255860567092896, "logits/rejected": 1.28125, "nll_loss": 0.4027343690395355, "epoch": 1.973684210526316, "step": 75}, {"loss": 0.3640625, "grad_norm": 0.5488588034556375, "learning_rate": 2.25245510964597e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.282344, "rewards/chosen": 15.3125, "rewards/rejected": 1.3738281726837158, "rewards/accuracies": 1.0, "rewards/margins": 13.925000190734863, "logps/chosen": -239.39999389648438, "logps/rejected": -437.20001220703125, "logits/chosen": -0.18242187798023224, "logits/rejected": 1.212890625, "nll_loss": 0.38066405057907104, "epoch": 2.1052631578947367, "step": 80}, {"eval_loss": 0.410400390625, "eval_runtime": 1.3645, "eval_samples_per_second": 2.931, "eval_steps_per_second": 0.733, "eval_rewards/chosen": 14.9375, "eval_rewards/rejected": 0.375, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.5625, "eval_logps/chosen": -235.0, "eval_logps/rejected": -502.0, "eval_logits/chosen": -1.3046875, "eval_logits/rejected": 2.0, "eval_nll_loss": 0.41015625, "epoch": 2.1052631578947367, "step": 80}, {"loss": 0.339306640625, "grad_norm": 0.5977630685878709, "learning_rate": 1.6760206719303105e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.282252, "rewards/chosen": 16.162500381469727, "rewards/rejected": 0.8084961175918579, "rewards/accuracies": 1.0, "rewards/margins": 15.362500190734863, "logps/chosen": -250.60000610351562, "logps/rejected": -473.6000061035156, "logits/chosen": -0.2928222715854645, "logits/rejected": 1.396875023841858, "nll_loss": 0.3388671875, "epoch": 2.236842105263158, "step": 85}, {"loss": 0.3234832763671875, "grad_norm": 0.2955118166701661, "learning_rate": 1.1697777844051105e-05, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.285022, "rewards/chosen": 16.237499237060547, "rewards/rejected": 1.9640624523162842, "rewards/accuracies": 1.0, "rewards/margins": 14.3125, "logps/chosen": -254.39999389648438, "logps/rejected": -422.0, "logits/chosen": -0.35551756620407104, "logits/rejected": 0.749218761920929, "nll_loss": 0.3232421875, "epoch": 2.3684210526315788, "step": 90}, {"eval_loss": 0.407958984375, "eval_runtime": 1.3391, "eval_samples_per_second": 2.987, "eval_steps_per_second": 0.747, "eval_rewards/chosen": 15.0625, "eval_rewards/rejected": 0.5, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.5625, "eval_logps/chosen": -234.0, "eval_logps/rejected": -502.0, "eval_logits/chosen": -1.265625, "eval_logits/rejected": 2.0, "eval_nll_loss": 0.408203125, "epoch": 2.3684210526315788, "step": 90}, {"loss": 0.41697998046875, "grad_norm": 0.25565176186037897, "learning_rate": 7.444166378150013e-06, "memory(GiB)": 40.52, "train_speed(iter/s)": 0.283485, "rewards/chosen": 17.387500762939453, "rewards/rejected": 0.702099621295929, "rewards/accuracies": 1.0, "rewards/margins": 16.700000762939453, "logps/chosen": -312.6000061035156, "logps/rejected": -527.5999755859375, "logits/chosen": -0.21904297173023224, "logits/rejected": 1.0421874523162842, "nll_loss": 0.41621094942092896, "epoch": 2.5, "step": 95}, {"loss": 0.351910400390625, "grad_norm": 0.44788320009382443, "learning_rate": 4.089194655986306e-06, "memory(GiB)": 49.77, "train_speed(iter/s)": 0.28525, "rewards/chosen": 16.462499618530273, "rewards/rejected": 0.850390613079071, "rewards/accuracies": 1.0, "rewards/margins": 15.600000381469727, "logps/chosen": -245.89999389648438, "logps/rejected": -406.79998779296875, "logits/chosen": -0.5326172113418579, "logits/rejected": 1.3898437023162842, "nll_loss": 0.35175782442092896, "epoch": 2.6315789473684212, "step": 100}, {"eval_loss": 0.408935546875, "eval_runtime": 1.3384, "eval_samples_per_second": 2.989, "eval_steps_per_second": 0.747, "eval_rewards/chosen": 15.0625, "eval_rewards/rejected": 0.6015625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.5, "eval_logps/chosen": -234.0, "eval_logps/rejected": -500.0, "eval_logits/chosen": -1.265625, "eval_logits/rejected": 2.0, "eval_nll_loss": 0.408203125, "epoch": 2.6315789473684212, "step": 100}, {"loss": 0.316326904296875, "grad_norm": 0.7897418622893607, "learning_rate": 1.70370868554659e-06, "memory(GiB)": 49.77, "train_speed(iter/s)": 0.284651, "rewards/chosen": 16.012500762939453, "rewards/rejected": 2.2035155296325684, "rewards/accuracies": 1.0, "rewards/margins": 13.800000190734863, "logps/chosen": -228.39999389648438, "logps/rejected": -440.79998779296875, "logits/chosen": -0.3160156309604645, "logits/rejected": 1.130468726158142, "nll_loss": 0.3160156309604645, "epoch": 2.763157894736842, "step": 105}, {"loss": 0.31457977294921874, "grad_norm": 0.45474075769771194, "learning_rate": 3.380821129028489e-07, "memory(GiB)": 49.77, "train_speed(iter/s)": 0.285606, "rewards/chosen": 15.824999809265137, "rewards/rejected": 2.114062547683716, "rewards/accuracies": 1.0, "rewards/margins": 13.699999809265137, "logps/chosen": -212.39999389648438, "logps/rejected": -443.6000061035156, "logits/chosen": -0.35332030057907104, "logits/rejected": 1.181249976158142, "nll_loss": 0.314453125, "epoch": 2.8947368421052633, "step": 110}, {"eval_loss": 0.40771484375, "eval_runtime": 1.3701, "eval_samples_per_second": 2.92, "eval_steps_per_second": 0.73, "eval_rewards/chosen": 15.125, "eval_rewards/rejected": 0.5, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.5625, "eval_logps/chosen": -234.0, "eval_logps/rejected": -502.0, "eval_logits/chosen": -1.265625, "eval_logits/rejected": 2.0, "eval_nll_loss": 0.408203125, "epoch": 2.8947368421052633, "step": 110}, {"eval_loss": 0.406982421875, "eval_runtime": 1.2905, "eval_samples_per_second": 3.1, "eval_steps_per_second": 0.775, "eval_rewards/chosen": 15.125, "eval_rewards/rejected": 0.4765625, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 14.625, "eval_logps/chosen": -234.0, "eval_logps/rejected": -502.0, "eval_logits/chosen": -1.2578125, "eval_logits/rejected": 2.0, "eval_nll_loss": 0.40625, "epoch": 3.0, "step": 114}, {"train_runtime": 400.4982, "train_samples_per_second": 2.232, "train_steps_per_second": 0.285, "total_flos": 145005522976768.0, "train_loss": 0.5838529352556195, "epoch": 3.0, "step": 114}], "memory": 49.7734375} diff --git a/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs/events.out.tfevents.1739624021.kml-task-540432-record-10144729-prod-worker-0.14199.0 b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs/events.out.tfevents.1739624021.kml-task-540432-record-10144729-prod-worker-0.14199.0 new file mode 100644 index 0000000000000000000000000000000000000000..3dc91b71a7d3ab6de6f6c406bd97fad7f356a4ef --- /dev/null +++ b/output_deepseek_dpo_215/deepseek-r1-7b_400_0.5_dpo_4096_rank8_epoch3_what/v0-20250215-125144/runs/events.out.tfevents.1739624021.kml-task-540432-record-10144729-prod-worker-0.14199.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d95f41f686db371200fc462f4e663c24fb1c5acdffc826624030142b2fb92cc +size 36883