ibrahima2222 commited on
Commit
cc59065
·
verified ·
1 Parent(s): ba79dba

Upload rl/meta_000499.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. rl/meta_000499.json +82 -0
rl/meta_000499.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "sequence_len": 2048,
4
+ "vocab_size": 65536,
5
+ "n_layer": 32,
6
+ "n_head": 16,
7
+ "n_kv_head": 16,
8
+ "n_embd": 2048,
9
+ "intermediate_size": null,
10
+ "mlp_type": "relu2",
11
+ "rope_theta": 10000.0,
12
+ "attention_bias": false,
13
+ "moe_num_experts": 0,
14
+ "moe_top_k": 1,
15
+ "moe_layer_start": 0,
16
+ "moe_layer_end": -1,
17
+ "moe_layer_stride": 1,
18
+ "moe_capacity_factor": 1.25,
19
+ "moe_aux_loss_coef": 0.01
20
+ },
21
+ "user_config": {
22
+ "run": "r1_grpo_grpo",
23
+ "source": "sft",
24
+ "ref_source": "sft",
25
+ "dtype": "float32",
26
+ "device_batch_size": 1,
27
+ "examples_per_step": 16,
28
+ "num_samples": 4,
29
+ "ppo_minibatch_size": 64,
30
+ "max_prompt_tokens": 1024,
31
+ "max_new_tokens": 2048,
32
+ "temperature": 1.0,
33
+ "top_k": 50,
34
+ "kl_coef": 0.0,
35
+ "kl_max_threshold": 50.0,
36
+ "reward_scale": 1.0,
37
+ "reward_mode": "dapo",
38
+ "group_dynamic_sampling": 0,
39
+ "group_dynamic_sampling_max_tries": 50,
40
+ "use_best_of_n": 0,
41
+ "active_sampling": 1,
42
+ "zero_grad_filtering": 1,
43
+ "zero_adv_eps": 1e-08,
44
+ "format_hint_mode": "eval",
45
+ "learning_rate": 1e-06,
46
+ "lr_schedule": "constant",
47
+ "temp_start": 1.0,
48
+ "temp_end": 1.0,
49
+ "temp_schedule": "none",
50
+ "length_penalty_mode": "dapo",
51
+ "length_penalty_coef": 1.0,
52
+ "length_penalty_target": 2048,
53
+ "length_penalty_floor": 0.0,
54
+ "clip_eps": 0.2,
55
+ "clip_ratio_low": 0.8,
56
+ "clip_ratio_high": 1.28,
57
+ "advantage_clip": 5.0,
58
+ "grpo_epochs": 4,
59
+ "grpo_lr_scale": 1.0,
60
+ "num_steps": 500,
61
+ "total_examples": -1,
62
+ "save_every": 100,
63
+ "eval_every": 50,
64
+ "eval_num_per_task": 5,
65
+ "eval_seed": 123,
66
+ "eval_temperature": 0.0,
67
+ "eval_top_k": 0,
68
+ "eval_max_new_tokens": 256,
69
+ "use_deepspeed": 0,
70
+ "deepspeed_config": "slurm/deepspeed_zero3.json",
71
+ "use_fsdp": 0,
72
+ "fsdp_min_num_params": 1000000,
73
+ "fsdp_cpu_offload": 0,
74
+ "task_mix": "dolci:1.0,gsm8k:0.45,math:0.20,mmlu_science:0.10,mbpp:0.25",
75
+ "dolci_dataset_id": "allenai/Dolci-Think-RL-32B",
76
+ "dolci_split": "train",
77
+ "dolci_mode": "cot",
78
+ "dolci_stop": -1,
79
+ "dolci_streaming": 0,
80
+ "dolci_stream_cache": ""
81
+ }
82
+ }