jaygala24 commited on
Commit
45de45f
·
verified ·
1 Parent(s): 14c524c

Add training_config.yaml

Browse files
Files changed (1) hide show
  1. training_config.yaml +287 -0
training_config.yaml ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ finetune:
2
+ data: null
3
+ model_class: causal-language-modeling
4
+ config_name: ${..model_path}
5
+ optim: adamw_torch
6
+ load_as_bf16: true
7
+ fp32_lm_head: ${..fp32_lm_head}
8
+ fp32_layer_prefix: ${..fp32_layer_prefix}
9
+ use_flash_attention: true
10
+ attn_implementation: flash_attention_2
11
+ auto_device_map: false
12
+ lora:
13
+ enabled: false
14
+ task_type: CAUSAL_LM
15
+ base_model_8bit: false
16
+ base_model_4bit: false
17
+ r: 16
18
+ alpha: 16
19
+ dropout: 0.05
20
+ bias: none
21
+ target_modules: []
22
+ force_restart: ${..force_restart}
23
+ resume_dataloader: false
24
+ train_batch_size: 2
25
+ valid_batch_size: 4
26
+ weight_decay: 0.01
27
+ learning_rate: 1.0e-06
28
+ gradient_clipping_threshold: 0.3
29
+ lr_scheduler_type: cosine
30
+ num_warmup_steps: 25
31
+ gradient_accumulation_passes: 128
32
+ gradient_checkpointing: true
33
+ reentrant_checkpointing: false
34
+ max_train_steps: 1500
35
+ interrupt_train_steps: -1
36
+ max_eval_steps: -1
37
+ seq_length: 8192
38
+ seq_packing: true
39
+ output_dir: ${..output_dir}/finetune
40
+ seed: ${..seed}
41
+ save_checkpoint_steps: 100
42
+ keep_intermediate_checkpoints: true
43
+ trust_remote_code: false
44
+ cuda_empty_cache: true
45
+ sft_config_name: null
46
+ n_examples: 0
47
+ log_each_n_steps: 1
48
+ also_save_steps: []
49
+ use_safetensors: true
50
+ save_final_training_state: true
51
+ seq_parallel: 1
52
+ objective: rl
53
+ input: training_data
54
+ send_weight_updates: true
55
+ queue_size: 32
56
+ max_lag: null
57
+ weight_update_interval: 1
58
+ pop_old_data: ${..pop_old_data}
59
+ attempts: 8
60
+ eval_callback:
61
+ _target_: pipelinerl.finetune.utils.dummy_eval_callback
62
+ config_name: ''
63
+ rl:
64
+ policy_loss: reinforce
65
+ divide_advantage_by_std: false
66
+ kl_coef: 0.0
67
+ final_kl_coef: 0.0
68
+ entropy_bonus: 0.0
69
+ reward_minus_kl_coef: 0.0
70
+ epsilon_low: 0.02
71
+ epsilon_high: 0.02
72
+ use_advantages: true
73
+ relu_log_p_weights: false
74
+ clamp_log_ratio_ref_new_value: 5
75
+ temperature: ${...llm.parameters.temperature}
76
+ aggregate_loss: sum
77
+ overlong_filtering: false
78
+ adv_estimator: rloo
79
+ filter_zero_advantage_groups: false
80
+ rewards:
81
+ correct_answer_finished: 1.0
82
+ correct_answer_not_finished: 1.0
83
+ wrong_answer_finished: 0
84
+ wrong_answer_not_finished: 0
85
+ no_answer_finished: 0
86
+ no_answer_not_finished: 0
87
+ unparsable_finished: 0
88
+ unparsable_not_finished: 0
89
+ streams:
90
+ backend: files
91
+ seed: 42
92
+ fp32_lm_head: false
93
+ fp32_layer_prefix: lm_head
94
+ actor:
95
+ log_each_n_secs: 0
96
+ llm_max_rollouts: 256
97
+ rollout_workers: 1
98
+ discount_factor: 1
99
+ problem_queue_size: 256
100
+ result_queue_size: 256
101
+ throughput_window_size: 50
102
+ shared_memory_entry_size: 10000000
103
+ rollout_policy: pipelinerl.domains.math.generate_math_rollout
104
+ system_prompt: Please reason step by step, and put your final answer within \boxed{}.
105
+ task_template: '{task}'
106
+ task_prompt: ''
107
+ environment: null
108
+ preprocess:
109
+ input: actor
110
+ output: training_data
111
+ n_workers: 8
112
+ chunk_n_groups: 2
113
+ raw_queue_size: 8
114
+ input_queue_size: 32
115
+ output_queue_size: 32
116
+ dataset_buffer_size: 0
117
+ ring_buffer_size: 128
118
+ max_ready_samples_per_lead: 64
119
+ pop_old_data: ${..pop_old_data}
120
+ shared_memory_entry_size: 100000000
121
+ log_every_n_samples: 128
122
+ llm:
123
+ parameters:
124
+ max_tokens: 4096
125
+ temperature: 1.0
126
+ test_llm:
127
+ parameters:
128
+ max_tokens: 4096
129
+ temperature: 1.0
130
+ top_p: 0.95
131
+ top_k: 50
132
+ vllm_config:
133
+ use_v1: false
134
+ quantization: null
135
+ vllm_kwargs:
136
+ dtype: bfloat16
137
+ gpu-memory-utilization: 0.92
138
+ max-num-seqs: 64
139
+ max-num-batched-tokens: 16384
140
+ enable-chunked-prefill: ''
141
+ return-tokens-as-token-ids: ''
142
+ tensor-parallel-size: 1
143
+ pipeline-parallel-size: 1
144
+ generation-config: vllm
145
+ max_model_len: 8192
146
+ num-scheduler-steps: 8
147
+ disable-log-requests: ''
148
+ disable-frontend-multiprocessing: ''
149
+ world:
150
+ replicas: 1
151
+ actor_fraction: 6
152
+ preprocessor_fraction: 0
153
+ finetune_fraction: 2
154
+ env_replicas: 1
155
+ actor_group_port: 9000
156
+ environment_start_port: 7777
157
+ jobs:
158
+ - kind: actor_llm
159
+ idx: 0
160
+ replica_idx: 0
161
+ local_idx: 0
162
+ node_rank: 0
163
+ hostname: localhost
164
+ port: 8080
165
+ gpus:
166
+ - 0
167
+ url: http://localhost:8080
168
+ environment_key: null
169
+ environment_index: null
170
+ - kind: actor_llm
171
+ idx: 1
172
+ replica_idx: 1
173
+ local_idx: 1
174
+ node_rank: 0
175
+ hostname: localhost
176
+ port: 8081
177
+ gpus:
178
+ - 1
179
+ url: http://localhost:8081
180
+ environment_key: null
181
+ environment_index: null
182
+ - kind: actor_llm
183
+ idx: 2
184
+ replica_idx: 2
185
+ local_idx: 2
186
+ node_rank: 0
187
+ hostname: localhost
188
+ port: 8082
189
+ gpus:
190
+ - 2
191
+ url: http://localhost:8082
192
+ environment_key: null
193
+ environment_index: null
194
+ - kind: actor
195
+ idx: 3
196
+ replica_idx: 0
197
+ local_idx: 0
198
+ node_rank: 0
199
+ hostname: localhost
200
+ port: null
201
+ gpus: []
202
+ url: ''
203
+ environment_key: null
204
+ environment_index: null
205
+ - kind: preprocessor
206
+ idx: 4
207
+ replica_idx: 0
208
+ local_idx: 0
209
+ node_rank: 0
210
+ hostname: localhost
211
+ port: null
212
+ gpus: []
213
+ url: ''
214
+ environment_key: null
215
+ environment_index: null
216
+ - kind: environment
217
+ idx: 5
218
+ replica_idx: 0
219
+ local_idx: 0
220
+ node_rank: 0
221
+ hostname: localhost
222
+ port: 7777
223
+ gpus: []
224
+ url: ''
225
+ environment_key: math
226
+ environment_index: 0
227
+ - kind: finetune
228
+ idx: 6
229
+ replica_idx: 0
230
+ local_idx: 0
231
+ node_rank: 0
232
+ hostname: localhost
233
+ port: null
234
+ gpus:
235
+ - 3
236
+ url: ''
237
+ environment_key: null
238
+ environment_index: null
239
+ eval_every_n_versions: 78000
240
+ model_path: Qwen/Qwen3-4B
241
+ accelerate_config: null
242
+ use_deepspeed: true
243
+ deepspeed_config: deepspeed_stage3_bf16
244
+ use_fsdp: false
245
+ fsdp:
246
+ param_dtype: fp32
247
+ reduce_dtype: fp32
248
+ buffer_dtype: fp32
249
+ output_dir: results/qwen3_4b_rloo_no_kl_3a1f_4xh100_236660
250
+ force_restart: false
251
+ pop_old_data: true
252
+ max_lag: null
253
+ attempts: 16
254
+ train_subset: null
255
+ debug:
256
+ mode: ''
257
+ streams_from: null
258
+ place_inference_workers: true
259
+ use_existing_llms: false
260
+ me:
261
+ job_idx: null
262
+ wandb:
263
+ use_wandb: true
264
+ fail_on_init_error: false
265
+ init_timeout: 120
266
+ wandb_id: null
267
+ wandb_name: null
268
+ wandb_entity_name: jaygala24-team
269
+ wandb_project_name: rl-post-training
270
+ wandb_resume: always
271
+ wandb_use_basename: true
272
+ wandb_workspace_root: results
273
+ wandb_group: qwen3_4b_rloo_no_kl_3a1f_4xh100_236660
274
+ wandb_dir: null
275
+ tags: []
276
+ environments:
277
+ - key: math
278
+ mode: remote
279
+ _target_: pipelinerl.domains.math.MathEnvironment
280
+ environment_key: math
281
+ dataset_loader: pipelinerl.domains.math.load_datasets
282
+ train_dataset_names:
283
+ - gsm8k_train
284
+ - math_train
285
+ test_dataset_names:
286
+ - gsm8k_test
287
+ - math_500