cdomingoenrich commited on
Commit
399e182
·
verified ·
1 Parent(s): 2efbddc

Upload model checkpoint (+run_config.json)

Browse files
Files changed (1) hide show
  1. run_config.json +252 -0
run_config.json ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "args": {
3
+ "actor_freeze": 0,
4
+ "actor_init_on_gpu": false,
5
+ "actor_learning_rate": 1e-05,
6
+ "actor_lr_warmup_ratio": null,
7
+ "actor_num_gpus_per_node": 1,
8
+ "actor_num_nodes": 1,
9
+ "actor_train_period": null,
10
+ "adam_betas": [
11
+ 0.9,
12
+ 0.95
13
+ ],
14
+ "adam_offload": true,
15
+ "advantage_estimator": "rloo",
16
+ "agent_func_path": null,
17
+ "apply_chat_template": false,
18
+ "async_train": false,
19
+ "aux_loss_coef": 0,
20
+ "bf16": true,
21
+ "bigcodebench_max_samples": 1140,
22
+ "bleu_n_gram": 1,
23
+ "ce_horizon": null,
24
+ "ckpt_path": "./ed_checkpoints/ed_sweep_a_freeze_0_a_lr_1e-05_ctx_2_c_bb_0_c_lr_0.0_c_lr_head_0.0_cpt_qwen15_dm_False_ed_code130k_freezing_actor_steps_-1_gen_2_ce_1.0_pt_qwen15_pd_code130k_qm_False_rt_0.0_str_2_wh_False/ckpt",
25
+ "classifier_sequence_selection": "closest",
26
+ "colocate_actor_ref": false,
27
+ "colocate_all_models": true,
28
+ "colocate_critic_reward": false,
29
+ "colocate_reward_models": false,
30
+ "context_max_len": 2,
31
+ "critic_backbone_freeze": 0,
32
+ "critic_bradley_terry_loss_coef": 0.0,
33
+ "critic_ce_loss_coef": 0.0,
34
+ "critic_class_loss_coef": 0.0,
35
+ "critic_embedding_infonce_loss_coef": 0.0,
36
+ "critic_infonce_loss_coef": 1.0,
37
+ "critic_learning_rate": 0.0,
38
+ "critic_loss_choice": "log",
39
+ "critic_lr_head": 0.0,
40
+ "critic_lr_scheduler": "constant_with_warmup",
41
+ "critic_lr_warmup_ratio": null,
42
+ "critic_mom_loss_coef": 0.0,
43
+ "critic_mom_sequence_level": "last_token",
44
+ "critic_num_gpus_per_node": 1,
45
+ "critic_num_nodes": 1,
46
+ "critic_pretrain": "Qwen/Qwen2.5-1.5B",
47
+ "critic_reward_horizon": null,
48
+ "critic_reward_start": null,
49
+ "critic_reward_target": 0.0,
50
+ "critic_sequence_level": "last_token",
51
+ "critic_train_interval": 1,
52
+ "critic_train_period": null,
53
+ "debug": false,
54
+ "deepcompile": false,
55
+ "deepspeed_enable_sleep": false,
56
+ "disable_ds_ckpt": true,
57
+ "disable_fast_tokenizer": false,
58
+ "document_masking": false,
59
+ "ds_tensor_parallel_size": 1,
60
+ "dual_clip": null,
61
+ "dynamic_filtering": false,
62
+ "dynamic_filtering_reward_range": [
63
+ 0,
64
+ 1
65
+ ],
66
+ "ema_beta": 0.9,
67
+ "embed_method": "last_token",
68
+ "embedding_infonce_hard_negative_k": 0,
69
+ "embedding_infonce_ignore_sim_threshold": null,
70
+ "embedding_infonce_ignore_top_k": 1,
71
+ "embedding_infonce_min_negatives": 0,
72
+ "embedding_infonce_mismatched_real_k": 0,
73
+ "embedding_infonce_temperature": 0.1,
74
+ "enable_ema": true,
75
+ "enable_prefix_caching": false,
76
+ "enforce_eager": false,
77
+ "entropy_loss_coef": null,
78
+ "eps_clip": 0.2,
79
+ "eps_clip_low_high": [
80
+ 0.2,
81
+ 0.2
82
+ ],
83
+ "eval_batch_size": 16,
84
+ "eval_bertscore_batch_size": 16,
85
+ "eval_bertscore_model": "roberta-base",
86
+ "eval_dataset": "sjelassi/opencode-instruct_130k",
87
+ "eval_detox_device": "",
88
+ "eval_detox_fl_model": "cointegrated/roberta-large-cola-krishna2020",
89
+ "eval_detox_sim_model": "sentence-transformers/LaBSE",
90
+ "eval_detox_sta_model": "s-nlp/roberta_toxicity_classifier",
91
+ "eval_down_batch_size": 128,
92
+ "eval_down_max_samples": 128,
93
+ "eval_down_steps": 50,
94
+ "eval_factuality_batch_size": 16,
95
+ "eval_factuality_device": "cuda",
96
+ "eval_factuality_entailment_threshold": 0.5,
97
+ "eval_factuality_max_length": 512,
98
+ "eval_factuality_max_sentences": null,
99
+ "eval_factuality_metric": "none",
100
+ "eval_factuality_model": "roberta-large-mnli",
101
+ "eval_factuality_truncation": "only_first",
102
+ "eval_generate_max_len": 512,
103
+ "eval_max_samples": 128,
104
+ "eval_mt_batch_size": 1,
105
+ "eval_mt_max_samples": 8,
106
+ "eval_mt_steps": -1,
107
+ "eval_n_samples_per_prompt": 4,
108
+ "eval_n_samples_per_prompt_down": 4,
109
+ "eval_n_samples_per_prompt_mt": 100,
110
+ "eval_split": "test",
111
+ "eval_steps": -1,
112
+ "eval_style_transfer_log_samples": 0,
113
+ "eval_summarization_log_samples": 0,
114
+ "eval_temperature": 1.0,
115
+ "eval_temperature_down": 1.0,
116
+ "eval_temperature_mt": 1.0,
117
+ "eval_translation_log_samples": 0,
118
+ "flash_attn": false,
119
+ "freezing_actor_steps": -1,
120
+ "full_determinism": false,
121
+ "gamma": 1,
122
+ "generate_max_len": 2,
123
+ "grad_accum_dtype": null,
124
+ "gradient_checkpointing": true,
125
+ "gradient_checkpointing_use_reentrant": false,
126
+ "hidden_state_method": "concat",
127
+ "humaneval_max_samples": 164,
128
+ "init_ce_coef": 1.0,
129
+ "init_kl_coef": 0.0,
130
+ "input_key": "question",
131
+ "input_template": null,
132
+ "keep_critic_on": false,
133
+ "kl_estimator": "k2",
134
+ "kl_horizon": null,
135
+ "kl_target": null,
136
+ "l2": 0.0,
137
+ "label_key": "answer",
138
+ "lambd": 1,
139
+ "load_actor_checkpoint": false,
140
+ "load_critic_checkpoint": false,
141
+ "load_in_4bit": false,
142
+ "local_rank": -1,
143
+ "log_gradients": true,
144
+ "logging_steps": 1,
145
+ "lora_alpha": 16,
146
+ "lora_dropout": 0,
147
+ "lora_rank": 0,
148
+ "lr_scheduler": "constant_with_warmup",
149
+ "lr_warmup_ratio": 0.03,
150
+ "max_ckpt_mem": 100000000.0,
151
+ "max_ckpt_num": 3,
152
+ "max_epochs": 1,
153
+ "max_len": null,
154
+ "max_norm": 1.0,
155
+ "max_samples": -1,
156
+ "mbpp_max_samples": 974,
157
+ "micro_reward_batch_size": 8,
158
+ "micro_rollout_batch_size": 8,
159
+ "micro_train_batch_size": 8,
160
+ "mom_reward_target": 1.0,
161
+ "n_samples_per_prompt": 4,
162
+ "no_advantage_std_norm": false,
163
+ "normalize_reward": false,
164
+ "num_episodes": 1,
165
+ "output_key": "answer",
166
+ "overlap_comm": false,
167
+ "overlong_buffer_len": null,
168
+ "overlong_penalty_factor": 1,
169
+ "packing_samples": false,
170
+ "perf": false,
171
+ "policy_loss_type": "ppo",
172
+ "pos_rew_coef": 1.0,
173
+ "pretrain": "Qwen/Qwen2.5-1.5B",
174
+ "pretrain_mode": true,
175
+ "prompt_data": "sjelassi/opencode-instruct_130k",
176
+ "prompt_data_probs": null,
177
+ "prompt_max_len": 1024,
178
+ "prompt_split": "train",
179
+ "ptx_coef": 0.05,
180
+ "qa_masking": false,
181
+ "ref_num_gpus_per_node": 1,
182
+ "ref_num_nodes": 1,
183
+ "ref_reward_offload": false,
184
+ "reinit_critic": false,
185
+ "remote_rm_url": null,
186
+ "reward_choice": "gan",
187
+ "reward_clip_range": [
188
+ -10,
189
+ 10
190
+ ],
191
+ "reward_num_gpus_per_node": 1,
192
+ "reward_num_nodes": 1,
193
+ "reward_pretrain": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
194
+ "rff_factor": 1.0,
195
+ "rff_features": 1024,
196
+ "ring_attn_size": 1,
197
+ "ring_head_stride": 1,
198
+ "rl_horizon": null,
199
+ "rl_start": null,
200
+ "rl_target": 0.0,
201
+ "rollout_batch_size": 16,
202
+ "rollout_max_tokens_per_gpu": null,
203
+ "save_hf_ckpt": true,
204
+ "save_log_scale_count": -1,
205
+ "save_path": "./ed_checkpoints/ed_sweep_a_freeze_0_a_lr_1e-05_ctx_2_c_bb_0_c_lr_0.0_c_lr_head_0.0_cpt_qwen15_dm_False_ed_code130k_freezing_actor_steps_-1_gen_2_ce_1.0_pt_qwen15_pd_code130k_qm_False_rt_0.0_str_2_wh_False",
206
+ "save_steps": 250,
207
+ "save_value_network": false,
208
+ "seed": 43,
209
+ "slurm_job": "None_0",
210
+ "stride": 2,
211
+ "target_modules": "all-linear",
212
+ "temperature": 1.0,
213
+ "top_p": 1.0,
214
+ "train_batch_size": 64,
215
+ "train_max_tokens_per_gpu": 16192,
216
+ "use_ds_universal_ckpt": false,
217
+ "use_dynamic_batch": false,
218
+ "use_kl_loss": true,
219
+ "use_liger_kernel": false,
220
+ "use_ms": false,
221
+ "use_rff_kernel": true,
222
+ "use_spectral_norm": false,
223
+ "use_tensorboard": null,
224
+ "use_wandb": "629a07f37adb439bb40b4f10d84afe378a0a30ca",
225
+ "use_whitening": false,
226
+ "use_whitening_critic": false,
227
+ "value_clip": 0.5,
228
+ "value_head_prefix": "score",
229
+ "vllm_enable_sleep": false,
230
+ "vllm_generate_batch_size": 16,
231
+ "vllm_gpu_memory_utilization": 0.95,
232
+ "vllm_num_engines": null,
233
+ "vllm_sync_backend": "nccl",
234
+ "vllm_sync_with_ray": false,
235
+ "vllm_tensor_parallel_size": 1,
236
+ "wandb_group": null,
237
+ "wandb_org": null,
238
+ "wandb_project": "openrlhf_carles_runs",
239
+ "wandb_run_name": "ed_sweep_a_freeze_0_a_lr_1e-05_ctx_2_c_bb_0_c_lr_0.0_c_lr_head_0.0_cpt_qwen15_dm_False_ed_code130k_freezing_actor_steps_-1_gen_2_ce_1.0_pt_qwen15_pd_code130k_qm_False_rt_0.0_str_2_wh_False",
240
+ "zero_stage": 2,
241
+ "zpg": 1
242
+ },
243
+ "client_states": {
244
+ "data_loader_state_dict": "<omitted>",
245
+ "episode": 0,
246
+ "global_step": 250
247
+ },
248
+ "cwd": "/data/ebm_openrlhf",
249
+ "hostname": "ebm11-0-worker-0",
250
+ "tag": "global_step250",
251
+ "timestamp": "2026-01-09T10:40:50.833634+00:00"
252
+ }