| { | |
| "args": { | |
| "actor_freeze": 0, | |
| "actor_init_on_gpu": false, | |
| "actor_learning_rate": 1e-05, | |
| "actor_lr_warmup_ratio": null, | |
| "actor_num_gpus_per_node": 1, | |
| "actor_num_nodes": 1, | |
| "actor_train_period": null, | |
| "adam_betas": [ | |
| 0.9, | |
| 0.95 | |
| ], | |
| "adam_offload": true, | |
| "advantage_estimator": "rloo", | |
| "agent_func_path": null, | |
| "apply_chat_template": false, | |
| "async_train": false, | |
| "aux_loss_coef": 0, | |
| "bf16": true, | |
| "bigcodebench_max_samples": 1140, | |
| "bleu_n_gram": 1, | |
| "ce_horizon": null, | |
| "ckpt_path": "./ed_checkpoints/ed_sweep_a_freeze_0_a_lr_1e-05_ctx_2_c_bb_0_c_lr_0.0_c_lr_head_0.0_cpt_qwen15_dm_False_ed_code130k_freezing_actor_steps_-1_gen_2_ce_1.0_pt_qwen15_pd_code130k_qm_False_rt_0.0_str_2_wh_False/ckpt", | |
| "classifier_sequence_selection": "closest", | |
| "colocate_actor_ref": false, | |
| "colocate_all_models": true, | |
| "colocate_critic_reward": false, | |
| "colocate_reward_models": false, | |
| "context_max_len": 2, | |
| "critic_backbone_freeze": 0, | |
| "critic_bradley_terry_loss_coef": 0.0, | |
| "critic_ce_loss_coef": 0.0, | |
| "critic_class_loss_coef": 0.0, | |
| "critic_embedding_infonce_loss_coef": 0.0, | |
| "critic_infonce_loss_coef": 1.0, | |
| "critic_learning_rate": 0.0, | |
| "critic_loss_choice": "log", | |
| "critic_lr_head": 0.0, | |
| "critic_lr_scheduler": "constant_with_warmup", | |
| "critic_lr_warmup_ratio": null, | |
| "critic_mom_loss_coef": 0.0, | |
| "critic_mom_sequence_level": "last_token", | |
| "critic_num_gpus_per_node": 1, | |
| "critic_num_nodes": 1, | |
| "critic_pretrain": "Qwen/Qwen2.5-1.5B", | |
| "critic_reward_horizon": null, | |
| "critic_reward_start": null, | |
| "critic_reward_target": 0.0, | |
| "critic_sequence_level": "last_token", | |
| "critic_train_interval": 1, | |
| "critic_train_period": null, | |
| "debug": false, | |
| "deepcompile": false, | |
| "deepspeed_enable_sleep": false, | |
| "disable_ds_ckpt": true, | |
| "disable_fast_tokenizer": false, | |
| "document_masking": false, | |
| "ds_tensor_parallel_size": 1, | |
| "dual_clip": null, | |
| "dynamic_filtering": false, | |
| "dynamic_filtering_reward_range": [ | |
| 0, | |
| 1 | |
| ], | |
| "ema_beta": 0.9, | |
| "embed_method": "last_token", | |
| "embedding_infonce_hard_negative_k": 0, | |
| "embedding_infonce_ignore_sim_threshold": null, | |
| "embedding_infonce_ignore_top_k": 1, | |
| "embedding_infonce_min_negatives": 0, | |
| "embedding_infonce_mismatched_real_k": 0, | |
| "embedding_infonce_temperature": 0.1, | |
| "enable_ema": true, | |
| "enable_prefix_caching": false, | |
| "enforce_eager": false, | |
| "entropy_loss_coef": null, | |
| "eps_clip": 0.2, | |
| "eps_clip_low_high": [ | |
| 0.2, | |
| 0.2 | |
| ], | |
| "eval_batch_size": 16, | |
| "eval_bertscore_batch_size": 16, | |
| "eval_bertscore_model": "roberta-base", | |
| "eval_dataset": "sjelassi/opencode-instruct_130k", | |
| "eval_detox_device": "", | |
| "eval_detox_fl_model": "cointegrated/roberta-large-cola-krishna2020", | |
| "eval_detox_sim_model": "sentence-transformers/LaBSE", | |
| "eval_detox_sta_model": "s-nlp/roberta_toxicity_classifier", | |
| "eval_down_batch_size": 128, | |
| "eval_down_max_samples": 128, | |
| "eval_down_steps": 50, | |
| "eval_factuality_batch_size": 16, | |
| "eval_factuality_device": "cuda", | |
| "eval_factuality_entailment_threshold": 0.5, | |
| "eval_factuality_max_length": 512, | |
| "eval_factuality_max_sentences": null, | |
| "eval_factuality_metric": "none", | |
| "eval_factuality_model": "roberta-large-mnli", | |
| "eval_factuality_truncation": "only_first", | |
| "eval_generate_max_len": 512, | |
| "eval_max_samples": 128, | |
| "eval_mt_batch_size": 1, | |
| "eval_mt_max_samples": 8, | |
| "eval_mt_steps": -1, | |
| "eval_n_samples_per_prompt": 4, | |
| "eval_n_samples_per_prompt_down": 4, | |
| "eval_n_samples_per_prompt_mt": 100, | |
| "eval_split": "test", | |
| "eval_steps": -1, | |
| "eval_style_transfer_log_samples": 0, | |
| "eval_summarization_log_samples": 0, | |
| "eval_temperature": 1.0, | |
| "eval_temperature_down": 1.0, | |
| "eval_temperature_mt": 1.0, | |
| "eval_translation_log_samples": 0, | |
| "flash_attn": false, | |
| "freezing_actor_steps": -1, | |
| "full_determinism": false, | |
| "gamma": 1, | |
| "generate_max_len": 2, | |
| "grad_accum_dtype": null, | |
| "gradient_checkpointing": true, | |
| "gradient_checkpointing_use_reentrant": false, | |
| "hidden_state_method": "concat", | |
| "humaneval_max_samples": 164, | |
| "init_ce_coef": 1.0, | |
| "init_kl_coef": 0.0, | |
| "input_key": "question", | |
| "input_template": null, | |
| "keep_critic_on": false, | |
| "kl_estimator": "k2", | |
| "kl_horizon": null, | |
| "kl_target": null, | |
| "l2": 0.0, | |
| "label_key": "answer", | |
| "lambd": 1, | |
| "load_actor_checkpoint": false, | |
| "load_critic_checkpoint": false, | |
| "load_in_4bit": false, | |
| "local_rank": -1, | |
| "log_gradients": true, | |
| "logging_steps": 1, | |
| "lora_alpha": 16, | |
| "lora_dropout": 0, | |
| "lora_rank": 0, | |
| "lr_scheduler": "constant_with_warmup", | |
| "lr_warmup_ratio": 0.03, | |
| "max_ckpt_mem": 100000000.0, | |
| "max_ckpt_num": 3, | |
| "max_epochs": 1, | |
| "max_len": null, | |
| "max_norm": 1.0, | |
| "max_samples": -1, | |
| "mbpp_max_samples": 974, | |
| "micro_reward_batch_size": 8, | |
| "micro_rollout_batch_size": 8, | |
| "micro_train_batch_size": 8, | |
| "mom_reward_target": 1.0, | |
| "n_samples_per_prompt": 4, | |
| "no_advantage_std_norm": false, | |
| "normalize_reward": false, | |
| "num_episodes": 1, | |
| "output_key": "answer", | |
| "overlap_comm": false, | |
| "overlong_buffer_len": null, | |
| "overlong_penalty_factor": 1, | |
| "packing_samples": false, | |
| "perf": false, | |
| "policy_loss_type": "ppo", | |
| "pos_rew_coef": 1.0, | |
| "pretrain": "Qwen/Qwen2.5-1.5B", | |
| "pretrain_mode": true, | |
| "prompt_data": "sjelassi/opencode-instruct_130k", | |
| "prompt_data_probs": null, | |
| "prompt_max_len": 1024, | |
| "prompt_split": "train", | |
| "ptx_coef": 0.05, | |
| "qa_masking": false, | |
| "ref_num_gpus_per_node": 1, | |
| "ref_num_nodes": 1, | |
| "ref_reward_offload": false, | |
| "reinit_critic": false, | |
| "remote_rm_url": null, | |
| "reward_choice": "gan", | |
| "reward_clip_range": [ | |
| -10, | |
| 10 | |
| ], | |
| "reward_num_gpus_per_node": 1, | |
| "reward_num_nodes": 1, | |
| "reward_pretrain": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", | |
| "rff_factor": 1.0, | |
| "rff_features": 1024, | |
| "ring_attn_size": 1, | |
| "ring_head_stride": 1, | |
| "rl_horizon": null, | |
| "rl_start": null, | |
| "rl_target": 0.0, | |
| "rollout_batch_size": 16, | |
| "rollout_max_tokens_per_gpu": null, | |
| "save_hf_ckpt": true, | |
| "save_log_scale_count": -1, | |
| "save_path": "./ed_checkpoints/ed_sweep_a_freeze_0_a_lr_1e-05_ctx_2_c_bb_0_c_lr_0.0_c_lr_head_0.0_cpt_qwen15_dm_False_ed_code130k_freezing_actor_steps_-1_gen_2_ce_1.0_pt_qwen15_pd_code130k_qm_False_rt_0.0_str_2_wh_False", | |
| "save_steps": 250, | |
| "save_value_network": false, | |
| "seed": 43, | |
| "slurm_job": "None_0", | |
| "stride": 2, | |
| "target_modules": "all-linear", | |
| "temperature": 1.0, | |
| "top_p": 1.0, | |
| "train_batch_size": 64, | |
| "train_max_tokens_per_gpu": 16192, | |
| "use_ds_universal_ckpt": false, | |
| "use_dynamic_batch": false, | |
| "use_kl_loss": true, | |
| "use_liger_kernel": false, | |
| "use_ms": false, | |
| "use_rff_kernel": true, | |
| "use_spectral_norm": false, | |
| "use_tensorboard": null, | |
| "use_wandb": "629a07f37adb439bb40b4f10d84afe378a0a30ca", | |
| "use_whitening": false, | |
| "use_whitening_critic": false, | |
| "value_clip": 0.5, | |
| "value_head_prefix": "score", | |
| "vllm_enable_sleep": false, | |
| "vllm_generate_batch_size": 16, | |
| "vllm_gpu_memory_utilization": 0.95, | |
| "vllm_num_engines": null, | |
| "vllm_sync_backend": "nccl", | |
| "vllm_sync_with_ray": false, | |
| "vllm_tensor_parallel_size": 1, | |
| "wandb_group": null, | |
| "wandb_org": null, | |
| "wandb_project": "openrlhf_carles_runs", | |
| "wandb_run_name": "ed_sweep_a_freeze_0_a_lr_1e-05_ctx_2_c_bb_0_c_lr_0.0_c_lr_head_0.0_cpt_qwen15_dm_False_ed_code130k_freezing_actor_steps_-1_gen_2_ce_1.0_pt_qwen15_pd_code130k_qm_False_rt_0.0_str_2_wh_False", | |
| "zero_stage": 2, | |
| "zpg": 1 | |
| }, | |
| "client_states": { | |
| "data_loader_state_dict": "<omitted>", | |
| "episode": 0, | |
| "global_step": 500 | |
| }, | |
| "cwd": "/data/ebm_openrlhf", | |
| "hostname": "ebm11-0-worker-0", | |
| "tag": "global_step500", | |
| "timestamp": "2026-01-09T14:25:16.670956+00:00" | |
| } | |