{ "args": { "actor_freeze": 0, "actor_init_on_gpu": false, "actor_learning_rate": 1e-05, "actor_lr_warmup_ratio": null, "actor_num_gpus_per_node": 1, "actor_num_nodes": 1, "actor_train_period": null, "adam_betas": [ 0.9, 0.95 ], "adam_offload": true, "advantage_estimator": "rloo", "agent_func_path": null, "apply_chat_template": false, "async_train": false, "aux_loss_coef": 0, "bf16": true, "bigcodebench_max_samples": 1140, "bleu_n_gram": 1, "ce_horizon": null, "ckpt_path": "./ed_checkpoints/ed_sweep_a_freeze_0_a_lr_1e-05_ctx_2_c_bb_0_c_lr_0.0_c_lr_head_0.0_cpt_qwen15_dm_False_ed_code130k_freezing_actor_steps_-1_gen_2_ce_1.0_pt_qwen15_pd_code130k_qm_False_rt_0.0_str_2_wh_False/ckpt", "classifier_sequence_selection": "closest", "colocate_actor_ref": false, "colocate_all_models": true, "colocate_critic_reward": false, "colocate_reward_models": false, "context_max_len": 2, "critic_backbone_freeze": 0, "critic_bradley_terry_loss_coef": 0.0, "critic_ce_loss_coef": 0.0, "critic_class_loss_coef": 0.0, "critic_embedding_infonce_loss_coef": 0.0, "critic_infonce_loss_coef": 1.0, "critic_learning_rate": 0.0, "critic_loss_choice": "log", "critic_lr_head": 0.0, "critic_lr_scheduler": "constant_with_warmup", "critic_lr_warmup_ratio": null, "critic_mom_loss_coef": 0.0, "critic_mom_sequence_level": "last_token", "critic_num_gpus_per_node": 1, "critic_num_nodes": 1, "critic_pretrain": "Qwen/Qwen2.5-1.5B", "critic_reward_horizon": null, "critic_reward_start": null, "critic_reward_target": 0.0, "critic_sequence_level": "last_token", "critic_train_interval": 1, "critic_train_period": null, "debug": false, "deepcompile": false, "deepspeed_enable_sleep": false, "disable_ds_ckpt": true, "disable_fast_tokenizer": false, "document_masking": false, "ds_tensor_parallel_size": 1, "dual_clip": null, "dynamic_filtering": false, "dynamic_filtering_reward_range": [ 0, 1 ], "ema_beta": 0.9, "embed_method": "last_token", "embedding_infonce_hard_negative_k": 0, "embedding_infonce_ignore_sim_threshold": null, "embedding_infonce_ignore_top_k": 1, "embedding_infonce_min_negatives": 0, "embedding_infonce_mismatched_real_k": 0, "embedding_infonce_temperature": 0.1, "enable_ema": true, "enable_prefix_caching": false, "enforce_eager": false, "entropy_loss_coef": null, "eps_clip": 0.2, "eps_clip_low_high": [ 0.2, 0.2 ], "eval_batch_size": 16, "eval_bertscore_batch_size": 16, "eval_bertscore_model": "roberta-base", "eval_dataset": "sjelassi/opencode-instruct_130k", "eval_detox_device": "", "eval_detox_fl_model": "cointegrated/roberta-large-cola-krishna2020", "eval_detox_sim_model": "sentence-transformers/LaBSE", "eval_detox_sta_model": "s-nlp/roberta_toxicity_classifier", "eval_down_batch_size": 128, "eval_down_max_samples": 128, "eval_down_steps": 50, "eval_factuality_batch_size": 16, "eval_factuality_device": "cuda", "eval_factuality_entailment_threshold": 0.5, "eval_factuality_max_length": 512, "eval_factuality_max_sentences": null, "eval_factuality_metric": "none", "eval_factuality_model": "roberta-large-mnli", "eval_factuality_truncation": "only_first", "eval_generate_max_len": 512, "eval_max_samples": 128, "eval_mt_batch_size": 1, "eval_mt_max_samples": 8, "eval_mt_steps": -1, "eval_n_samples_per_prompt": 4, "eval_n_samples_per_prompt_down": 4, "eval_n_samples_per_prompt_mt": 100, "eval_split": "test", "eval_steps": -1, "eval_style_transfer_log_samples": 0, "eval_summarization_log_samples": 0, "eval_temperature": 1.0, "eval_temperature_down": 1.0, "eval_temperature_mt": 1.0, "eval_translation_log_samples": 0, "flash_attn": false, "freezing_actor_steps": -1, "full_determinism": false, "gamma": 1, "generate_max_len": 2, "grad_accum_dtype": null, "gradient_checkpointing": true, "gradient_checkpointing_use_reentrant": false, "hidden_state_method": "concat", "humaneval_max_samples": 164, "init_ce_coef": 1.0, "init_kl_coef": 0.0, "input_key": "question", "input_template": null, "keep_critic_on": false, "kl_estimator": "k2", "kl_horizon": null, "kl_target": null, "l2": 0.0, "label_key": "answer", "lambd": 1, "load_actor_checkpoint": false, "load_critic_checkpoint": false, "load_in_4bit": false, "local_rank": -1, "log_gradients": true, "logging_steps": 1, "lora_alpha": 16, "lora_dropout": 0, "lora_rank": 0, "lr_scheduler": "constant_with_warmup", "lr_warmup_ratio": 0.03, "max_ckpt_mem": 100000000.0, "max_ckpt_num": 3, "max_epochs": 1, "max_len": null, "max_norm": 1.0, "max_samples": -1, "mbpp_max_samples": 974, "micro_reward_batch_size": 8, "micro_rollout_batch_size": 8, "micro_train_batch_size": 8, "mom_reward_target": 1.0, "n_samples_per_prompt": 4, "no_advantage_std_norm": false, "normalize_reward": false, "num_episodes": 1, "output_key": "answer", "overlap_comm": false, "overlong_buffer_len": null, "overlong_penalty_factor": 1, "packing_samples": false, "perf": false, "policy_loss_type": "ppo", "pos_rew_coef": 1.0, "pretrain": "Qwen/Qwen2.5-1.5B", "pretrain_mode": true, "prompt_data": "sjelassi/opencode-instruct_130k", "prompt_data_probs": null, "prompt_max_len": 1024, "prompt_split": "train", "ptx_coef": 0.05, "qa_masking": false, "ref_num_gpus_per_node": 1, "ref_num_nodes": 1, "ref_reward_offload": false, "reinit_critic": false, "remote_rm_url": null, "reward_choice": "gan", "reward_clip_range": [ -10, 10 ], "reward_num_gpus_per_node": 1, "reward_num_nodes": 1, "reward_pretrain": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "rff_factor": 1.0, "rff_features": 1024, "ring_attn_size": 1, "ring_head_stride": 1, "rl_horizon": null, "rl_start": null, "rl_target": 0.0, "rollout_batch_size": 16, "rollout_max_tokens_per_gpu": null, "save_hf_ckpt": true, "save_log_scale_count": -1, "save_path": "./ed_checkpoints/ed_sweep_a_freeze_0_a_lr_1e-05_ctx_2_c_bb_0_c_lr_0.0_c_lr_head_0.0_cpt_qwen15_dm_False_ed_code130k_freezing_actor_steps_-1_gen_2_ce_1.0_pt_qwen15_pd_code130k_qm_False_rt_0.0_str_2_wh_False", "save_steps": 250, "save_value_network": false, "seed": 43, "slurm_job": "None_0", "stride": 2, "target_modules": "all-linear", "temperature": 1.0, "top_p": 1.0, "train_batch_size": 64, "train_max_tokens_per_gpu": 16192, "use_ds_universal_ckpt": false, "use_dynamic_batch": false, "use_kl_loss": true, "use_liger_kernel": false, "use_ms": false, "use_rff_kernel": true, "use_spectral_norm": false, "use_tensorboard": null, "use_wandb": "629a07f37adb439bb40b4f10d84afe378a0a30ca", "use_whitening": false, "use_whitening_critic": false, "value_clip": 0.5, "value_head_prefix": "score", "vllm_enable_sleep": false, "vllm_generate_batch_size": 16, "vllm_gpu_memory_utilization": 0.95, "vllm_num_engines": null, "vllm_sync_backend": "nccl", "vllm_sync_with_ray": false, "vllm_tensor_parallel_size": 1, "wandb_group": null, "wandb_org": null, "wandb_project": "openrlhf_carles_runs", "wandb_run_name": "ed_sweep_a_freeze_0_a_lr_1e-05_ctx_2_c_bb_0_c_lr_0.0_c_lr_head_0.0_cpt_qwen15_dm_False_ed_code130k_freezing_actor_steps_-1_gen_2_ce_1.0_pt_qwen15_pd_code130k_qm_False_rt_0.0_str_2_wh_False", "zero_stage": 2, "zpg": 1 }, "client_states": { "data_loader_state_dict": "", "episode": 0, "global_step": 250 }, "cwd": "/data/ebm_openrlhf", "hostname": "ebm11-0-worker-0", "tag": "global_step250", "timestamp": "2026-01-09T10:40:50.833634+00:00" }