qwen15_code130k_step250 / run_config.json
cdomingoenrich's picture
Upload model checkpoint (+run_config.json)
399e182 verified
{
"args": {
"actor_freeze": 0,
"actor_init_on_gpu": false,
"actor_learning_rate": 1e-05,
"actor_lr_warmup_ratio": null,
"actor_num_gpus_per_node": 1,
"actor_num_nodes": 1,
"actor_train_period": null,
"adam_betas": [
0.9,
0.95
],
"adam_offload": true,
"advantage_estimator": "rloo",
"agent_func_path": null,
"apply_chat_template": false,
"async_train": false,
"aux_loss_coef": 0,
"bf16": true,
"bigcodebench_max_samples": 1140,
"bleu_n_gram": 1,
"ce_horizon": null,
"ckpt_path": "./ed_checkpoints/ed_sweep_a_freeze_0_a_lr_1e-05_ctx_2_c_bb_0_c_lr_0.0_c_lr_head_0.0_cpt_qwen15_dm_False_ed_code130k_freezing_actor_steps_-1_gen_2_ce_1.0_pt_qwen15_pd_code130k_qm_False_rt_0.0_str_2_wh_False/ckpt",
"classifier_sequence_selection": "closest",
"colocate_actor_ref": false,
"colocate_all_models": true,
"colocate_critic_reward": false,
"colocate_reward_models": false,
"context_max_len": 2,
"critic_backbone_freeze": 0,
"critic_bradley_terry_loss_coef": 0.0,
"critic_ce_loss_coef": 0.0,
"critic_class_loss_coef": 0.0,
"critic_embedding_infonce_loss_coef": 0.0,
"critic_infonce_loss_coef": 1.0,
"critic_learning_rate": 0.0,
"critic_loss_choice": "log",
"critic_lr_head": 0.0,
"critic_lr_scheduler": "constant_with_warmup",
"critic_lr_warmup_ratio": null,
"critic_mom_loss_coef": 0.0,
"critic_mom_sequence_level": "last_token",
"critic_num_gpus_per_node": 1,
"critic_num_nodes": 1,
"critic_pretrain": "Qwen/Qwen2.5-1.5B",
"critic_reward_horizon": null,
"critic_reward_start": null,
"critic_reward_target": 0.0,
"critic_sequence_level": "last_token",
"critic_train_interval": 1,
"critic_train_period": null,
"debug": false,
"deepcompile": false,
"deepspeed_enable_sleep": false,
"disable_ds_ckpt": true,
"disable_fast_tokenizer": false,
"document_masking": false,
"ds_tensor_parallel_size": 1,
"dual_clip": null,
"dynamic_filtering": false,
"dynamic_filtering_reward_range": [
0,
1
],
"ema_beta": 0.9,
"embed_method": "last_token",
"embedding_infonce_hard_negative_k": 0,
"embedding_infonce_ignore_sim_threshold": null,
"embedding_infonce_ignore_top_k": 1,
"embedding_infonce_min_negatives": 0,
"embedding_infonce_mismatched_real_k": 0,
"embedding_infonce_temperature": 0.1,
"enable_ema": true,
"enable_prefix_caching": false,
"enforce_eager": false,
"entropy_loss_coef": null,
"eps_clip": 0.2,
"eps_clip_low_high": [
0.2,
0.2
],
"eval_batch_size": 16,
"eval_bertscore_batch_size": 16,
"eval_bertscore_model": "roberta-base",
"eval_dataset": "sjelassi/opencode-instruct_130k",
"eval_detox_device": "",
"eval_detox_fl_model": "cointegrated/roberta-large-cola-krishna2020",
"eval_detox_sim_model": "sentence-transformers/LaBSE",
"eval_detox_sta_model": "s-nlp/roberta_toxicity_classifier",
"eval_down_batch_size": 128,
"eval_down_max_samples": 128,
"eval_down_steps": 50,
"eval_factuality_batch_size": 16,
"eval_factuality_device": "cuda",
"eval_factuality_entailment_threshold": 0.5,
"eval_factuality_max_length": 512,
"eval_factuality_max_sentences": null,
"eval_factuality_metric": "none",
"eval_factuality_model": "roberta-large-mnli",
"eval_factuality_truncation": "only_first",
"eval_generate_max_len": 512,
"eval_max_samples": 128,
"eval_mt_batch_size": 1,
"eval_mt_max_samples": 8,
"eval_mt_steps": -1,
"eval_n_samples_per_prompt": 4,
"eval_n_samples_per_prompt_down": 4,
"eval_n_samples_per_prompt_mt": 100,
"eval_split": "test",
"eval_steps": -1,
"eval_style_transfer_log_samples": 0,
"eval_summarization_log_samples": 0,
"eval_temperature": 1.0,
"eval_temperature_down": 1.0,
"eval_temperature_mt": 1.0,
"eval_translation_log_samples": 0,
"flash_attn": false,
"freezing_actor_steps": -1,
"full_determinism": false,
"gamma": 1,
"generate_max_len": 2,
"grad_accum_dtype": null,
"gradient_checkpointing": true,
"gradient_checkpointing_use_reentrant": false,
"hidden_state_method": "concat",
"humaneval_max_samples": 164,
"init_ce_coef": 1.0,
"init_kl_coef": 0.0,
"input_key": "question",
"input_template": null,
"keep_critic_on": false,
"kl_estimator": "k2",
"kl_horizon": null,
"kl_target": null,
"l2": 0.0,
"label_key": "answer",
"lambd": 1,
"load_actor_checkpoint": false,
"load_critic_checkpoint": false,
"load_in_4bit": false,
"local_rank": -1,
"log_gradients": true,
"logging_steps": 1,
"lora_alpha": 16,
"lora_dropout": 0,
"lora_rank": 0,
"lr_scheduler": "constant_with_warmup",
"lr_warmup_ratio": 0.03,
"max_ckpt_mem": 100000000.0,
"max_ckpt_num": 3,
"max_epochs": 1,
"max_len": null,
"max_norm": 1.0,
"max_samples": -1,
"mbpp_max_samples": 974,
"micro_reward_batch_size": 8,
"micro_rollout_batch_size": 8,
"micro_train_batch_size": 8,
"mom_reward_target": 1.0,
"n_samples_per_prompt": 4,
"no_advantage_std_norm": false,
"normalize_reward": false,
"num_episodes": 1,
"output_key": "answer",
"overlap_comm": false,
"overlong_buffer_len": null,
"overlong_penalty_factor": 1,
"packing_samples": false,
"perf": false,
"policy_loss_type": "ppo",
"pos_rew_coef": 1.0,
"pretrain": "Qwen/Qwen2.5-1.5B",
"pretrain_mode": true,
"prompt_data": "sjelassi/opencode-instruct_130k",
"prompt_data_probs": null,
"prompt_max_len": 1024,
"prompt_split": "train",
"ptx_coef": 0.05,
"qa_masking": false,
"ref_num_gpus_per_node": 1,
"ref_num_nodes": 1,
"ref_reward_offload": false,
"reinit_critic": false,
"remote_rm_url": null,
"reward_choice": "gan",
"reward_clip_range": [
-10,
10
],
"reward_num_gpus_per_node": 1,
"reward_num_nodes": 1,
"reward_pretrain": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
"rff_factor": 1.0,
"rff_features": 1024,
"ring_attn_size": 1,
"ring_head_stride": 1,
"rl_horizon": null,
"rl_start": null,
"rl_target": 0.0,
"rollout_batch_size": 16,
"rollout_max_tokens_per_gpu": null,
"save_hf_ckpt": true,
"save_log_scale_count": -1,
"save_path": "./ed_checkpoints/ed_sweep_a_freeze_0_a_lr_1e-05_ctx_2_c_bb_0_c_lr_0.0_c_lr_head_0.0_cpt_qwen15_dm_False_ed_code130k_freezing_actor_steps_-1_gen_2_ce_1.0_pt_qwen15_pd_code130k_qm_False_rt_0.0_str_2_wh_False",
"save_steps": 250,
"save_value_network": false,
"seed": 43,
"slurm_job": "None_0",
"stride": 2,
"target_modules": "all-linear",
"temperature": 1.0,
"top_p": 1.0,
"train_batch_size": 64,
"train_max_tokens_per_gpu": 16192,
"use_ds_universal_ckpt": false,
"use_dynamic_batch": false,
"use_kl_loss": true,
"use_liger_kernel": false,
"use_ms": false,
"use_rff_kernel": true,
"use_spectral_norm": false,
"use_tensorboard": null,
"use_wandb": "629a07f37adb439bb40b4f10d84afe378a0a30ca",
"use_whitening": false,
"use_whitening_critic": false,
"value_clip": 0.5,
"value_head_prefix": "score",
"vllm_enable_sleep": false,
"vllm_generate_batch_size": 16,
"vllm_gpu_memory_utilization": 0.95,
"vllm_num_engines": null,
"vllm_sync_backend": "nccl",
"vllm_sync_with_ray": false,
"vllm_tensor_parallel_size": 1,
"wandb_group": null,
"wandb_org": null,
"wandb_project": "openrlhf_carles_runs",
"wandb_run_name": "ed_sweep_a_freeze_0_a_lr_1e-05_ctx_2_c_bb_0_c_lr_0.0_c_lr_head_0.0_cpt_qwen15_dm_False_ed_code130k_freezing_actor_steps_-1_gen_2_ce_1.0_pt_qwen15_pd_code130k_qm_False_rt_0.0_str_2_wh_False",
"zero_stage": 2,
"zpg": 1
},
"client_states": {
"data_loader_state_dict": "<omitted>",
"episode": 0,
"global_step": 250
},
"cwd": "/data/ebm_openrlhf",
"hostname": "ebm11-0-worker-0",
"tag": "global_step250",
"timestamp": "2026-01-09T10:40:50.833634+00:00"
}