TheBlackCat22 commited on
Commit
405ad4c
·
verified ·
1 Parent(s): 992e045

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +6 -0
  2. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/.hydra/config.yaml +85 -0
  3. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/.hydra/hydra.yaml +171 -0
  4. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/.hydra/overrides.yaml +14 -0
  5. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/added_tokens.json +24 -0
  6. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/chat_template.jinja +54 -0
  7. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/config.json +58 -0
  8. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/config_dump.yaml +85 -0
  9. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/events.out.tfevents.1763473963.c317-013.ls6.tacc.utexas.edu.3083110.0 +3 -0
  10. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/generation_config.json +14 -0
  11. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/main.log +0 -0
  12. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/merges.txt +0 -0
  13. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/model.safetensors +3 -0
  14. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/special_tokens_map.json +31 -0
  15. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/test_outputs.jsonl +0 -0
  16. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/test_results.json +27 -0
  17. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/tokenizer.json +3 -0
  18. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/tokenizer_config.json +207 -0
  19. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/training_args.bin +3 -0
  20. AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/vocab.json +0 -0
  21. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/.hydra/config.yaml +85 -0
  22. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/.hydra/hydra.yaml +171 -0
  23. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/.hydra/overrides.yaml +14 -0
  24. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/added_tokens.json +24 -0
  25. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/chat_template.jinja +54 -0
  26. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/config.json +58 -0
  27. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/config_dump.yaml +85 -0
  28. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/events.out.tfevents.1763473040.c317-015.ls6.tacc.utexas.edu.819015.0 +3 -0
  29. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/generation_config.json +14 -0
  30. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/main.log +0 -0
  31. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/merges.txt +0 -0
  32. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/model.safetensors +3 -0
  33. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/special_tokens_map.json +31 -0
  34. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/test_outputs.jsonl +0 -0
  35. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/test_results.json +27 -0
  36. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/tokenizer.json +3 -0
  37. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/tokenizer_config.json +207 -0
  38. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/training_args.bin +3 -0
  39. AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/vocab.json +0 -0
  40. AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/.hydra/config.yaml +85 -0
  41. AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/.hydra/hydra.yaml +171 -0
  42. AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/.hydra/overrides.yaml +14 -0
  43. AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/added_tokens.json +24 -0
  44. AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/chat_template.jinja +54 -0
  45. AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/config.json +58 -0
  46. AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/config_dump.yaml +85 -0
  47. AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/events.out.tfevents.1763472877.c316-011.ls6.tacc.utexas.edu.1952186.0 +3 -0
  48. AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/generation_config.json +14 -0
  49. AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/main.log +0 -0
  50. AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/merges.txt +0 -0
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ GSM8k/Qwen2.5-1.5B-Instruct_balanced_dapo/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ GSM8k/Qwen2.5-1.5B-Instruct_cosine_dapo/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ GSM8k/Qwen2.5-1.5B-Instruct_gaussian_dapo/tokenizer.json filter=lfs diff=lfs merge=lfs -text
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/.hydra/config.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mode: train
2
+ experiment:
3
+ dataset_size: 6000
4
+ dataset_seed: 1234
5
+ test_size: 0.1
6
+ hf_token: ${oc.env:HF_TOKEN,null}
7
+ output:
8
+ root_path: ${oc.env:ROOT_PATH}
9
+ run_name: ${model.trim}_${task.name}_${algorithm.name}_${algorithm.training.curriculum_schedule}_${algorithm.training.scheduler_params.mu_exp}_${algorithm.training.scheduler_params.sigma}_SEC${algorithm.training.scheduler_params.vrex_adds.sec}DRO${algorithm.training.scheduler_params.vrex_adds.groupdro}G${algorithm.training.scheduler_params.vrex_adds.gaussian}_minp${algorithm.training.scheduler_params.min_prob}${ckpt2short:${algorithm.training.resume_from_checkpoint}}_${algorithm.training.max_steps}
10
+ lora:
11
+ r: 32
12
+ alpha: 64
13
+ dropout: 0.1
14
+ target_modules:
15
+ - q_proj
16
+ - v_proj
17
+ task_type: CAUSAL_LM
18
+ occupy_gpu_memory: false
19
+ occupy_gpu_memory_gb: 50
20
+ gpu_device: cuda:0
21
+ model:
22
+ family: Qwen
23
+ trim: Qwen2.5-1.5B-Instruct
24
+ name: ${model.family}/${model.trim}
25
+ trust_remote_code: true
26
+ torch_dtype: bfloat16
27
+ attn_implementation: flash_attention_2
28
+ task:
29
+ name: aqua
30
+ training:
31
+ data_files:
32
+ - datasets/aqua/trivial
33
+ - datasets/aqua/easy
34
+ - datasets/aqua/medium
35
+ - datasets/aqua/hard
36
+ max_prompt_length: 512
37
+ max_completion_length: 512
38
+ inference:
39
+ data_files:
40
+ - datasets/aqua/trivial
41
+ - datasets/aqua/easy
42
+ - datasets/aqua/medium
43
+ - datasets/aqua/hard
44
+ temperature: 0
45
+ 'n': 1
46
+ algorithm:
47
+ name: grpo
48
+ training:
49
+ resume_from_checkpoint: null
50
+ learning_rate: 1.0e-06
51
+ lr_scheduler_type: cosine
52
+ logging_steps: 10
53
+ max_steps: 1600
54
+ per_device_train_batch_size: 16
55
+ generation_batch_size: null
56
+ steps_per_generation: 1
57
+ gradient_accumulation_steps: 4
58
+ gradient_checkpointing: true
59
+ bf16: true
60
+ report_to:
61
+ - tensorboard
62
+ push_to_hub: false
63
+ save_strategy: 'no'
64
+ save_steps: ${algorithm.training.max_steps}
65
+ tf32: true
66
+ num_generations: 8
67
+ beta: 0.001
68
+ use_vllm: true
69
+ vllm_mode: server
70
+ vllm_gpu_memory_utilization: 0.8
71
+ vllm_server_port: 8000
72
+ curriculum: false
73
+ curriculum_schedule: balanced
74
+ scheduler_params:
75
+ mu_exp: 0.5
76
+ sigma: 0.5
77
+ vrex_adds:
78
+ groupdro: 1.0
79
+ gaussian: 0.0
80
+ sec: 0.3
81
+ beta: 1.0
82
+ min_prob: true
83
+ td_alpha: 0.5
84
+ sec_temperature: 0.3
85
+ max_dapo_iter: 4
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/.hydra/hydra.yaml ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${output.root_path}/outputs/${mode2name:${mode},${output.run_name},${model.trim}}
4
+ sweep:
5
+ dir: ${output.root_path}/multirun/${now:%Y%m%d}
6
+ subdir: ${hydra.job.override_dirname}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task:
115
+ - mode=train
116
+ - model=qwen15
117
+ - task=aqua
118
+ - algorithm=grpo
119
+ - algorithm.training.curriculum_schedule=balanced
120
+ - algorithm.training.scheduler_params.mu_exp=0.5
121
+ - algorithm.training.scheduler_params.sigma=0.5
122
+ - algorithm.training.max_steps=1600
123
+ - algorithm.training.curriculum=false
124
+ - algorithm.training.vllm_gpu_memory_utilization=0.8
125
+ - algorithm.training.vllm_mode=server
126
+ - algorithm.training.report_to=[tensorboard]
127
+ - algorithm.training.push_to_hub=false
128
+ - algorithm.training.save_strategy=no
129
+ job:
130
+ name: main
131
+ chdir: false
132
+ override_dirname: algorithm.training.curriculum=false,algorithm.training.curriculum_schedule=balanced,algorithm.training.max_steps=1600,algorithm.training.push_to_hub=false,algorithm.training.report_to=[tensorboard],algorithm.training.save_strategy=no,algorithm.training.scheduler_params.mu_exp=0.5,algorithm.training.scheduler_params.sigma=0.5,algorithm.training.vllm_gpu_memory_utilization=0.8,algorithm.training.vllm_mode=server,algorithm=grpo,mode=train,model=qwen15,task=aqua
133
+ id: ???
134
+ num: ???
135
+ config_name: config
136
+ env_set: {}
137
+ env_copy: []
138
+ config:
139
+ override_dirname:
140
+ kv_sep: '='
141
+ item_sep: ','
142
+ exclude_keys: []
143
+ runtime:
144
+ version: 1.3.2
145
+ version_base: '1.3'
146
+ cwd: /scratch/10416/sushil22/projects/Sys2Bench_shurui
147
+ config_sources:
148
+ - path: hydra.conf
149
+ schema: pkg
150
+ provider: hydra
151
+ - path: /scratch/10416/sushil22/projects/Sys2Bench_shurui/methods/RL/conf
152
+ schema: file
153
+ provider: main
154
+ - path: ''
155
+ schema: structured
156
+ provider: schema
157
+ output_dir: /scratch/10416/sushil22/projects/Sys2Bench_shurui/outputs/Qwen2.5-1.5B-Instruct_aqua_grpo_balanced_0.5_0.5_SEC0.3DRO1.0G0.0_minpTrue_1600
158
+ choices:
159
+ algorithm: grpo
160
+ task: aqua
161
+ model: qwen15
162
+ hydra/env: default
163
+ hydra/callbacks: null
164
+ hydra/job_logging: default
165
+ hydra/hydra_logging: default
166
+ hydra/hydra_help: default
167
+ hydra/help: default
168
+ hydra/sweeper: basic
169
+ hydra/launcher: basic
170
+ hydra/output: default
171
+ verbose: false
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/.hydra/overrides.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ - mode=train
2
+ - model=qwen15
3
+ - task=aqua
4
+ - algorithm=grpo
5
+ - algorithm.training.curriculum_schedule=balanced
6
+ - algorithm.training.scheduler_params.mu_exp=0.5
7
+ - algorithm.training.scheduler_params.sigma=0.5
8
+ - algorithm.training.max_steps=1600
9
+ - algorithm.training.curriculum=false
10
+ - algorithm.training.vllm_gpu_memory_utilization=0.8
11
+ - algorithm.training.vllm_mode=server
12
+ - algorithm.training.report_to=[tensorboard]
13
+ - algorithm.training.push_to_hub=false
14
+ - algorithm.training.save_strategy=no
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 1536,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 8960,
12
+ "layer_types": [
13
+ "full_attention",
14
+ "full_attention",
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention"
41
+ ],
42
+ "max_position_embeddings": 32768,
43
+ "max_window_layers": 21,
44
+ "model_type": "qwen2",
45
+ "num_attention_heads": 12,
46
+ "num_hidden_layers": 28,
47
+ "num_key_value_heads": 2,
48
+ "rms_norm_eps": 1e-06,
49
+ "rope_scaling": null,
50
+ "rope_theta": 1000000.0,
51
+ "sliding_window": null,
52
+ "tie_word_embeddings": true,
53
+ "torch_dtype": "bfloat16",
54
+ "transformers_version": "4.53.1",
55
+ "use_cache": false,
56
+ "use_sliding_window": false,
57
+ "vocab_size": 151936
58
+ }
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/config_dump.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mode: train
2
+ experiment:
3
+ dataset_size: 6000
4
+ dataset_seed: 1234
5
+ test_size: 0.1
6
+ hf_token: ${oc.env:HF_TOKEN,null}
7
+ output:
8
+ root_path: ${oc.env:ROOT_PATH}
9
+ run_name: ${model.trim}_${task.name}_${algorithm.name}_${algorithm.training.curriculum_schedule}_${algorithm.training.scheduler_params.mu_exp}_${algorithm.training.scheduler_params.sigma}_SEC${algorithm.training.scheduler_params.vrex_adds.sec}DRO${algorithm.training.scheduler_params.vrex_adds.groupdro}G${algorithm.training.scheduler_params.vrex_adds.gaussian}_minp${algorithm.training.scheduler_params.min_prob}${ckpt2short:${algorithm.training.resume_from_checkpoint}}_${algorithm.training.max_steps}
10
+ lora:
11
+ r: 32
12
+ alpha: 64
13
+ dropout: 0.1
14
+ target_modules:
15
+ - q_proj
16
+ - v_proj
17
+ task_type: CAUSAL_LM
18
+ occupy_gpu_memory: false
19
+ occupy_gpu_memory_gb: 50
20
+ gpu_device: cuda:0
21
+ model:
22
+ family: Qwen
23
+ trim: Qwen2.5-1.5B-Instruct
24
+ name: ${model.family}/${model.trim}
25
+ trust_remote_code: true
26
+ torch_dtype: bfloat16
27
+ attn_implementation: flash_attention_2
28
+ task:
29
+ name: aqua
30
+ training:
31
+ data_files:
32
+ - datasets/aqua/trivial
33
+ - datasets/aqua/easy
34
+ - datasets/aqua/medium
35
+ - datasets/aqua/hard
36
+ max_prompt_length: 512
37
+ max_completion_length: 512
38
+ inference:
39
+ data_files:
40
+ - datasets/aqua/trivial
41
+ - datasets/aqua/easy
42
+ - datasets/aqua/medium
43
+ - datasets/aqua/hard
44
+ temperature: 0
45
+ 'n': 1
46
+ algorithm:
47
+ name: grpo
48
+ training:
49
+ resume_from_checkpoint: null
50
+ learning_rate: 1.0e-06
51
+ lr_scheduler_type: cosine
52
+ logging_steps: 10
53
+ max_steps: 1600
54
+ per_device_train_batch_size: 16
55
+ generation_batch_size: null
56
+ steps_per_generation: 1
57
+ gradient_accumulation_steps: 4
58
+ gradient_checkpointing: true
59
+ bf16: true
60
+ report_to:
61
+ - tensorboard
62
+ push_to_hub: false
63
+ save_strategy: 'no'
64
+ save_steps: ${algorithm.training.max_steps}
65
+ tf32: true
66
+ num_generations: 8
67
+ beta: 0.001
68
+ use_vllm: true
69
+ vllm_mode: server
70
+ vllm_gpu_memory_utilization: 0.8
71
+ vllm_server_port: 8000
72
+ curriculum: false
73
+ curriculum_schedule: balanced
74
+ scheduler_params:
75
+ mu_exp: 0.5
76
+ sigma: 0.5
77
+ vrex_adds:
78
+ groupdro: 1.0
79
+ gaussian: 0.0
80
+ sec: 0.3
81
+ beta: 1.0
82
+ min_prob: true
83
+ td_alpha: 0.5
84
+ sec_temperature: 0.3
85
+ max_dapo_iter: 4
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/events.out.tfevents.1763473963.c317-013.ls6.tacc.utexas.edu.3083110.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce72c3fc0e685e256e990eba01a7a628cbc0c122c8aa7b20b02c221bfc6745d7
3
+ size 240333
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.53.1"
14
+ }
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/main.log ADDED
The diff for this file is too large to render. See raw diff
 
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5eec44635b7f15a25f77cce44319760a6ca0a96bcf2149cf415c820299e7e2f
3
+ size 3087467144
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/test_outputs.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/test_results.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "overall": {
3
+ "avg_reward": 0.6889763779527559,
4
+ "accuracy": 0.65748031496063,
5
+ "support": 254
6
+ },
7
+ "trivial": {
8
+ "avg_reward": 0.9500000000000001,
9
+ "accuracy": 0.9444444444444444,
10
+ "support": 72
11
+ },
12
+ "easy": {
13
+ "avg_reward": 0.75,
14
+ "accuracy": 0.7222222222222222,
15
+ "support": 72
16
+ },
17
+ "medium": {
18
+ "avg_reward": 0.5333333333333333,
19
+ "accuracy": 0.4861111111111111,
20
+ "support": 72
21
+ },
22
+ "hard": {
23
+ "avg_reward": 0.37368421052631573,
24
+ "accuracy": 0.3157894736842105,
25
+ "support": 38
26
+ }
27
+ }
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214
3
+ size 11422063
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f7ebef5aa32f22a99a127e32b5781009bdbedaee8d39ba73e484773759c49ce
3
+ size 8056
AQUA/Qwen2.5-1.5B-Instruct_balanced_dapo/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/.hydra/config.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mode: train
2
+ experiment:
3
+ dataset_size: 6000
4
+ dataset_seed: 1234
5
+ test_size: 0.1
6
+ hf_token: ${oc.env:HF_TOKEN,null}
7
+ output:
8
+ root_path: ${oc.env:ROOT_PATH}
9
+ run_name: ${model.trim}_${task.name}_${algorithm.name}_${algorithm.training.curriculum_schedule}_${algorithm.training.scheduler_params.mu_exp}_${algorithm.training.scheduler_params.sigma}_SEC${algorithm.training.scheduler_params.vrex_adds.sec}DRO${algorithm.training.scheduler_params.vrex_adds.groupdro}G${algorithm.training.scheduler_params.vrex_adds.gaussian}_minp${algorithm.training.scheduler_params.min_prob}${ckpt2short:${algorithm.training.resume_from_checkpoint}}_${algorithm.training.max_steps}
10
+ lora:
11
+ r: 32
12
+ alpha: 64
13
+ dropout: 0.1
14
+ target_modules:
15
+ - q_proj
16
+ - v_proj
17
+ task_type: CAUSAL_LM
18
+ occupy_gpu_memory: false
19
+ occupy_gpu_memory_gb: 50
20
+ gpu_device: cuda:0
21
+ model:
22
+ family: Qwen
23
+ trim: Qwen2.5-1.5B-Instruct
24
+ name: ${model.family}/${model.trim}
25
+ trust_remote_code: true
26
+ torch_dtype: bfloat16
27
+ attn_implementation: flash_attention_2
28
+ task:
29
+ name: aqua
30
+ training:
31
+ data_files:
32
+ - datasets/aqua/trivial
33
+ - datasets/aqua/easy
34
+ - datasets/aqua/medium
35
+ - datasets/aqua/hard
36
+ max_prompt_length: 512
37
+ max_completion_length: 512
38
+ inference:
39
+ data_files:
40
+ - datasets/aqua/trivial
41
+ - datasets/aqua/easy
42
+ - datasets/aqua/medium
43
+ - datasets/aqua/hard
44
+ temperature: 0
45
+ 'n': 1
46
+ algorithm:
47
+ name: grpo
48
+ training:
49
+ resume_from_checkpoint: null
50
+ learning_rate: 1.0e-06
51
+ lr_scheduler_type: cosine
52
+ logging_steps: 10
53
+ max_steps: 1600
54
+ per_device_train_batch_size: 16
55
+ generation_batch_size: null
56
+ steps_per_generation: 1
57
+ gradient_accumulation_steps: 4
58
+ gradient_checkpointing: true
59
+ bf16: true
60
+ report_to:
61
+ - tensorboard
62
+ push_to_hub: false
63
+ save_strategy: 'no'
64
+ save_steps: ${algorithm.training.max_steps}
65
+ tf32: true
66
+ num_generations: 8
67
+ beta: 0.001
68
+ use_vllm: true
69
+ vllm_mode: server
70
+ vllm_gpu_memory_utilization: 0.8
71
+ vllm_server_port: 8000
72
+ curriculum: false
73
+ curriculum_schedule: cosine
74
+ scheduler_params:
75
+ mu_exp: 0.5
76
+ sigma: 0.5
77
+ vrex_adds:
78
+ groupdro: 1.0
79
+ gaussian: 0.0
80
+ sec: 0.3
81
+ beta: 1.0
82
+ min_prob: true
83
+ td_alpha: 0.5
84
+ sec_temperature: 0.3
85
+ max_dapo_iter: 4
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/.hydra/hydra.yaml ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${output.root_path}/outputs/${mode2name:${mode},${output.run_name},${model.trim}}
4
+ sweep:
5
+ dir: ${output.root_path}/multirun/${now:%Y%m%d}
6
+ subdir: ${hydra.job.override_dirname}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task:
115
+ - mode=train
116
+ - model=qwen15
117
+ - task=aqua
118
+ - algorithm=grpo
119
+ - algorithm.training.curriculum_schedule=cosine
120
+ - algorithm.training.scheduler_params.mu_exp=0.5
121
+ - algorithm.training.scheduler_params.sigma=0.5
122
+ - algorithm.training.max_steps=1600
123
+ - algorithm.training.curriculum=false
124
+ - algorithm.training.vllm_gpu_memory_utilization=0.8
125
+ - algorithm.training.vllm_mode=server
126
+ - algorithm.training.report_to=[tensorboard]
127
+ - algorithm.training.push_to_hub=false
128
+ - algorithm.training.save_strategy=no
129
+ job:
130
+ name: main
131
+ chdir: false
132
+ override_dirname: algorithm.training.curriculum=false,algorithm.training.curriculum_schedule=cosine,algorithm.training.max_steps=1600,algorithm.training.push_to_hub=false,algorithm.training.report_to=[tensorboard],algorithm.training.save_strategy=no,algorithm.training.scheduler_params.mu_exp=0.5,algorithm.training.scheduler_params.sigma=0.5,algorithm.training.vllm_gpu_memory_utilization=0.8,algorithm.training.vllm_mode=server,algorithm=grpo,mode=train,model=qwen15,task=aqua
133
+ id: ???
134
+ num: ???
135
+ config_name: config
136
+ env_set: {}
137
+ env_copy: []
138
+ config:
139
+ override_dirname:
140
+ kv_sep: '='
141
+ item_sep: ','
142
+ exclude_keys: []
143
+ runtime:
144
+ version: 1.3.2
145
+ version_base: '1.3'
146
+ cwd: /scratch/10416/sushil22/projects/Sys2Bench_shurui
147
+ config_sources:
148
+ - path: hydra.conf
149
+ schema: pkg
150
+ provider: hydra
151
+ - path: /scratch/10416/sushil22/projects/Sys2Bench_shurui/methods/RL/conf
152
+ schema: file
153
+ provider: main
154
+ - path: ''
155
+ schema: structured
156
+ provider: schema
157
+ output_dir: /scratch/10416/sushil22/projects/Sys2Bench_shurui/outputs/Qwen2.5-1.5B-Instruct_aqua_grpo_cosine_0.5_0.5_SEC0.3DRO1.0G0.0_minpTrue_1600
158
+ choices:
159
+ algorithm: grpo
160
+ task: aqua
161
+ model: qwen15
162
+ hydra/env: default
163
+ hydra/callbacks: null
164
+ hydra/job_logging: default
165
+ hydra/hydra_logging: default
166
+ hydra/hydra_help: default
167
+ hydra/help: default
168
+ hydra/sweeper: basic
169
+ hydra/launcher: basic
170
+ hydra/output: default
171
+ verbose: false
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/.hydra/overrides.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ - mode=train
2
+ - model=qwen15
3
+ - task=aqua
4
+ - algorithm=grpo
5
+ - algorithm.training.curriculum_schedule=cosine
6
+ - algorithm.training.scheduler_params.mu_exp=0.5
7
+ - algorithm.training.scheduler_params.sigma=0.5
8
+ - algorithm.training.max_steps=1600
9
+ - algorithm.training.curriculum=false
10
+ - algorithm.training.vllm_gpu_memory_utilization=0.8
11
+ - algorithm.training.vllm_mode=server
12
+ - algorithm.training.report_to=[tensorboard]
13
+ - algorithm.training.push_to_hub=false
14
+ - algorithm.training.save_strategy=no
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 1536,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 8960,
12
+ "layer_types": [
13
+ "full_attention",
14
+ "full_attention",
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention"
41
+ ],
42
+ "max_position_embeddings": 32768,
43
+ "max_window_layers": 21,
44
+ "model_type": "qwen2",
45
+ "num_attention_heads": 12,
46
+ "num_hidden_layers": 28,
47
+ "num_key_value_heads": 2,
48
+ "rms_norm_eps": 1e-06,
49
+ "rope_scaling": null,
50
+ "rope_theta": 1000000.0,
51
+ "sliding_window": null,
52
+ "tie_word_embeddings": true,
53
+ "torch_dtype": "bfloat16",
54
+ "transformers_version": "4.53.1",
55
+ "use_cache": false,
56
+ "use_sliding_window": false,
57
+ "vocab_size": 151936
58
+ }
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/config_dump.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mode: train
2
+ experiment:
3
+ dataset_size: 6000
4
+ dataset_seed: 1234
5
+ test_size: 0.1
6
+ hf_token: ${oc.env:HF_TOKEN,null}
7
+ output:
8
+ root_path: ${oc.env:ROOT_PATH}
9
+ run_name: ${model.trim}_${task.name}_${algorithm.name}_${algorithm.training.curriculum_schedule}_${algorithm.training.scheduler_params.mu_exp}_${algorithm.training.scheduler_params.sigma}_SEC${algorithm.training.scheduler_params.vrex_adds.sec}DRO${algorithm.training.scheduler_params.vrex_adds.groupdro}G${algorithm.training.scheduler_params.vrex_adds.gaussian}_minp${algorithm.training.scheduler_params.min_prob}${ckpt2short:${algorithm.training.resume_from_checkpoint}}_${algorithm.training.max_steps}
10
+ lora:
11
+ r: 32
12
+ alpha: 64
13
+ dropout: 0.1
14
+ target_modules:
15
+ - q_proj
16
+ - v_proj
17
+ task_type: CAUSAL_LM
18
+ occupy_gpu_memory: false
19
+ occupy_gpu_memory_gb: 50
20
+ gpu_device: cuda:0
21
+ model:
22
+ family: Qwen
23
+ trim: Qwen2.5-1.5B-Instruct
24
+ name: ${model.family}/${model.trim}
25
+ trust_remote_code: true
26
+ torch_dtype: bfloat16
27
+ attn_implementation: flash_attention_2
28
+ task:
29
+ name: aqua
30
+ training:
31
+ data_files:
32
+ - datasets/aqua/trivial
33
+ - datasets/aqua/easy
34
+ - datasets/aqua/medium
35
+ - datasets/aqua/hard
36
+ max_prompt_length: 512
37
+ max_completion_length: 512
38
+ inference:
39
+ data_files:
40
+ - datasets/aqua/trivial
41
+ - datasets/aqua/easy
42
+ - datasets/aqua/medium
43
+ - datasets/aqua/hard
44
+ temperature: 0
45
+ 'n': 1
46
+ algorithm:
47
+ name: grpo
48
+ training:
49
+ resume_from_checkpoint: null
50
+ learning_rate: 1.0e-06
51
+ lr_scheduler_type: cosine
52
+ logging_steps: 10
53
+ max_steps: 1600
54
+ per_device_train_batch_size: 16
55
+ generation_batch_size: null
56
+ steps_per_generation: 1
57
+ gradient_accumulation_steps: 4
58
+ gradient_checkpointing: true
59
+ bf16: true
60
+ report_to:
61
+ - tensorboard
62
+ push_to_hub: false
63
+ save_strategy: 'no'
64
+ save_steps: ${algorithm.training.max_steps}
65
+ tf32: true
66
+ num_generations: 8
67
+ beta: 0.001
68
+ use_vllm: true
69
+ vllm_mode: server
70
+ vllm_gpu_memory_utilization: 0.8
71
+ vllm_server_port: 8000
72
+ curriculum: false
73
+ curriculum_schedule: cosine
74
+ scheduler_params:
75
+ mu_exp: 0.5
76
+ sigma: 0.5
77
+ vrex_adds:
78
+ groupdro: 1.0
79
+ gaussian: 0.0
80
+ sec: 0.3
81
+ beta: 1.0
82
+ min_prob: true
83
+ td_alpha: 0.5
84
+ sec_temperature: 0.3
85
+ max_dapo_iter: 4
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/events.out.tfevents.1763473040.c317-015.ls6.tacc.utexas.edu.819015.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ababb120bf709b17124d5b0dad3e1f4b160699f3deab95fadca314a128bdaca9
3
+ size 240325
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.53.1"
14
+ }
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/main.log ADDED
The diff for this file is too large to render. See raw diff
 
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec25a67197d1ba53ea579e5c0305dc975425bc8444e422693f560af433855f52
3
+ size 3087467144
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/test_outputs.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/test_results.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "overall": {
3
+ "avg_reward": 0.7240157480314959,
4
+ "accuracy": 0.6968503937007874,
5
+ "support": 254
6
+ },
7
+ "trivial": {
8
+ "avg_reward": 0.9625000000000001,
9
+ "accuracy": 0.9583333333333334,
10
+ "support": 72
11
+ },
12
+ "easy": {
13
+ "avg_reward": 0.7722222222222221,
14
+ "accuracy": 0.75,
15
+ "support": 72
16
+ },
17
+ "medium": {
18
+ "avg_reward": 0.5944444444444446,
19
+ "accuracy": 0.5555555555555556,
20
+ "support": 72
21
+ },
22
+ "hard": {
23
+ "avg_reward": 0.42631578947368426,
24
+ "accuracy": 0.3684210526315789,
25
+ "support": 38
26
+ }
27
+ }
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214
3
+ size 11422063
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82d31421b25b5a50b5ba38fb66528f5df3e5fe731f5b6720b48f37d57418df96
3
+ size 8056
AQUA/Qwen2.5-1.5B-Instruct_cosine_dapo/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/.hydra/config.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mode: train
2
+ experiment:
3
+ dataset_size: 6000
4
+ dataset_seed: 1234
5
+ test_size: 0.1
6
+ hf_token: ${oc.env:HF_TOKEN,null}
7
+ output:
8
+ root_path: ${oc.env:ROOT_PATH}
9
+ run_name: ${model.trim}_${task.name}_${algorithm.name}_${algorithm.training.curriculum_schedule}_${algorithm.training.scheduler_params.mu_exp}_${algorithm.training.scheduler_params.sigma}_SEC${algorithm.training.scheduler_params.vrex_adds.sec}DRO${algorithm.training.scheduler_params.vrex_adds.groupdro}G${algorithm.training.scheduler_params.vrex_adds.gaussian}_minp${algorithm.training.scheduler_params.min_prob}${ckpt2short:${algorithm.training.resume_from_checkpoint}}_${algorithm.training.max_steps}
10
+ lora:
11
+ r: 32
12
+ alpha: 64
13
+ dropout: 0.1
14
+ target_modules:
15
+ - q_proj
16
+ - v_proj
17
+ task_type: CAUSAL_LM
18
+ occupy_gpu_memory: false
19
+ occupy_gpu_memory_gb: 50
20
+ gpu_device: cuda:0
21
+ model:
22
+ family: Qwen
23
+ trim: Qwen2.5-1.5B-Instruct
24
+ name: ${model.family}/${model.trim}
25
+ trust_remote_code: true
26
+ torch_dtype: bfloat16
27
+ attn_implementation: flash_attention_2
28
+ task:
29
+ name: aqua
30
+ training:
31
+ data_files:
32
+ - datasets/aqua/trivial
33
+ - datasets/aqua/easy
34
+ - datasets/aqua/medium
35
+ - datasets/aqua/hard
36
+ max_prompt_length: 512
37
+ max_completion_length: 512
38
+ inference:
39
+ data_files:
40
+ - datasets/aqua/trivial
41
+ - datasets/aqua/easy
42
+ - datasets/aqua/medium
43
+ - datasets/aqua/hard
44
+ temperature: 0
45
+ 'n': 1
46
+ algorithm:
47
+ name: grpo
48
+ training:
49
+ resume_from_checkpoint: null
50
+ learning_rate: 1.0e-06
51
+ lr_scheduler_type: cosine
52
+ logging_steps: 10
53
+ max_steps: 1600
54
+ per_device_train_batch_size: 16
55
+ generation_batch_size: null
56
+ steps_per_generation: 1
57
+ gradient_accumulation_steps: 4
58
+ gradient_checkpointing: true
59
+ bf16: true
60
+ report_to:
61
+ - tensorboard
62
+ push_to_hub: false
63
+ save_strategy: 'no'
64
+ save_steps: ${algorithm.training.max_steps}
65
+ tf32: true
66
+ num_generations: 8
67
+ beta: 0.001
68
+ use_vllm: true
69
+ vllm_mode: server
70
+ vllm_gpu_memory_utilization: 0.8
71
+ vllm_server_port: 8000
72
+ curriculum: false
73
+ curriculum_schedule: gaussian
74
+ scheduler_params:
75
+ mu_exp: 0.5
76
+ sigma: 0.5
77
+ vrex_adds:
78
+ groupdro: 1.0
79
+ gaussian: 0.0
80
+ sec: 0.3
81
+ beta: 1.0
82
+ min_prob: true
83
+ td_alpha: 0.5
84
+ sec_temperature: 0.3
85
+ max_dapo_iter: 4
AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/.hydra/hydra.yaml ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${output.root_path}/outputs/${mode2name:${mode},${output.run_name},${model.trim}}
4
+ sweep:
5
+ dir: ${output.root_path}/multirun/${now:%Y%m%d}
6
+ subdir: ${hydra.job.override_dirname}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task:
115
+ - mode=train
116
+ - model=qwen15
117
+ - task=aqua
118
+ - algorithm=grpo
119
+ - algorithm.training.curriculum_schedule=gaussian
120
+ - algorithm.training.scheduler_params.mu_exp=0.5
121
+ - algorithm.training.scheduler_params.sigma=0.5
122
+ - algorithm.training.max_steps=1600
123
+ - algorithm.training.curriculum=false
124
+ - algorithm.training.vllm_gpu_memory_utilization=0.8
125
+ - algorithm.training.vllm_mode=server
126
+ - algorithm.training.report_to=[tensorboard]
127
+ - algorithm.training.push_to_hub=false
128
+ - algorithm.training.save_strategy=no
129
+ job:
130
+ name: main
131
+ chdir: false
132
+ override_dirname: algorithm.training.curriculum=false,algorithm.training.curriculum_schedule=gaussian,algorithm.training.max_steps=1600,algorithm.training.push_to_hub=false,algorithm.training.report_to=[tensorboard],algorithm.training.save_strategy=no,algorithm.training.scheduler_params.mu_exp=0.5,algorithm.training.scheduler_params.sigma=0.5,algorithm.training.vllm_gpu_memory_utilization=0.8,algorithm.training.vllm_mode=server,algorithm=grpo,mode=train,model=qwen15,task=aqua
133
+ id: ???
134
+ num: ???
135
+ config_name: config
136
+ env_set: {}
137
+ env_copy: []
138
+ config:
139
+ override_dirname:
140
+ kv_sep: '='
141
+ item_sep: ','
142
+ exclude_keys: []
143
+ runtime:
144
+ version: 1.3.2
145
+ version_base: '1.3'
146
+ cwd: /scratch/10416/sushil22/projects/Sys2Bench_shurui
147
+ config_sources:
148
+ - path: hydra.conf
149
+ schema: pkg
150
+ provider: hydra
151
+ - path: /scratch/10416/sushil22/projects/Sys2Bench_shurui/methods/RL/conf
152
+ schema: file
153
+ provider: main
154
+ - path: ''
155
+ schema: structured
156
+ provider: schema
157
+ output_dir: /scratch/10416/sushil22/projects/Sys2Bench_shurui/outputs/Qwen2.5-1.5B-Instruct_aqua_grpo_gaussian_0.5_0.5_SEC0.3DRO1.0G0.0_minpTrue_1600
158
+ choices:
159
+ algorithm: grpo
160
+ task: aqua
161
+ model: qwen15
162
+ hydra/env: default
163
+ hydra/callbacks: null
164
+ hydra/job_logging: default
165
+ hydra/hydra_logging: default
166
+ hydra/hydra_help: default
167
+ hydra/help: default
168
+ hydra/sweeper: basic
169
+ hydra/launcher: basic
170
+ hydra/output: default
171
+ verbose: false
AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/.hydra/overrides.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ - mode=train
2
+ - model=qwen15
3
+ - task=aqua
4
+ - algorithm=grpo
5
+ - algorithm.training.curriculum_schedule=gaussian
6
+ - algorithm.training.scheduler_params.mu_exp=0.5
7
+ - algorithm.training.scheduler_params.sigma=0.5
8
+ - algorithm.training.max_steps=1600
9
+ - algorithm.training.curriculum=false
10
+ - algorithm.training.vllm_gpu_memory_utilization=0.8
11
+ - algorithm.training.vllm_mode=server
12
+ - algorithm.training.report_to=[tensorboard]
13
+ - algorithm.training.push_to_hub=false
14
+ - algorithm.training.save_strategy=no
AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 1536,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 8960,
12
+ "layer_types": [
13
+ "full_attention",
14
+ "full_attention",
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention"
41
+ ],
42
+ "max_position_embeddings": 32768,
43
+ "max_window_layers": 21,
44
+ "model_type": "qwen2",
45
+ "num_attention_heads": 12,
46
+ "num_hidden_layers": 28,
47
+ "num_key_value_heads": 2,
48
+ "rms_norm_eps": 1e-06,
49
+ "rope_scaling": null,
50
+ "rope_theta": 1000000.0,
51
+ "sliding_window": null,
52
+ "tie_word_embeddings": true,
53
+ "torch_dtype": "bfloat16",
54
+ "transformers_version": "4.53.1",
55
+ "use_cache": false,
56
+ "use_sliding_window": false,
57
+ "vocab_size": 151936
58
+ }
AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/config_dump.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mode: train
2
+ experiment:
3
+ dataset_size: 6000
4
+ dataset_seed: 1234
5
+ test_size: 0.1
6
+ hf_token: ${oc.env:HF_TOKEN,null}
7
+ output:
8
+ root_path: ${oc.env:ROOT_PATH}
9
+ run_name: ${model.trim}_${task.name}_${algorithm.name}_${algorithm.training.curriculum_schedule}_${algorithm.training.scheduler_params.mu_exp}_${algorithm.training.scheduler_params.sigma}_SEC${algorithm.training.scheduler_params.vrex_adds.sec}DRO${algorithm.training.scheduler_params.vrex_adds.groupdro}G${algorithm.training.scheduler_params.vrex_adds.gaussian}_minp${algorithm.training.scheduler_params.min_prob}${ckpt2short:${algorithm.training.resume_from_checkpoint}}_${algorithm.training.max_steps}
10
+ lora:
11
+ r: 32
12
+ alpha: 64
13
+ dropout: 0.1
14
+ target_modules:
15
+ - q_proj
16
+ - v_proj
17
+ task_type: CAUSAL_LM
18
+ occupy_gpu_memory: false
19
+ occupy_gpu_memory_gb: 50
20
+ gpu_device: cuda:0
21
+ model:
22
+ family: Qwen
23
+ trim: Qwen2.5-1.5B-Instruct
24
+ name: ${model.family}/${model.trim}
25
+ trust_remote_code: true
26
+ torch_dtype: bfloat16
27
+ attn_implementation: flash_attention_2
28
+ task:
29
+ name: aqua
30
+ training:
31
+ data_files:
32
+ - datasets/aqua/trivial
33
+ - datasets/aqua/easy
34
+ - datasets/aqua/medium
35
+ - datasets/aqua/hard
36
+ max_prompt_length: 512
37
+ max_completion_length: 512
38
+ inference:
39
+ data_files:
40
+ - datasets/aqua/trivial
41
+ - datasets/aqua/easy
42
+ - datasets/aqua/medium
43
+ - datasets/aqua/hard
44
+ temperature: 0
45
+ 'n': 1
46
+ algorithm:
47
+ name: grpo
48
+ training:
49
+ resume_from_checkpoint: null
50
+ learning_rate: 1.0e-06
51
+ lr_scheduler_type: cosine
52
+ logging_steps: 10
53
+ max_steps: 1600
54
+ per_device_train_batch_size: 16
55
+ generation_batch_size: null
56
+ steps_per_generation: 1
57
+ gradient_accumulation_steps: 4
58
+ gradient_checkpointing: true
59
+ bf16: true
60
+ report_to:
61
+ - tensorboard
62
+ push_to_hub: false
63
+ save_strategy: 'no'
64
+ save_steps: ${algorithm.training.max_steps}
65
+ tf32: true
66
+ num_generations: 8
67
+ beta: 0.001
68
+ use_vllm: true
69
+ vllm_mode: server
70
+ vllm_gpu_memory_utilization: 0.8
71
+ vllm_server_port: 8000
72
+ curriculum: false
73
+ curriculum_schedule: gaussian
74
+ scheduler_params:
75
+ mu_exp: 0.5
76
+ sigma: 0.5
77
+ vrex_adds:
78
+ groupdro: 1.0
79
+ gaussian: 0.0
80
+ sec: 0.3
81
+ beta: 1.0
82
+ min_prob: true
83
+ td_alpha: 0.5
84
+ sec_temperature: 0.3
85
+ max_dapo_iter: 4
AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/events.out.tfevents.1763472877.c316-011.ls6.tacc.utexas.edu.1952186.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5fbbc93a5cafde598633aba3331d14dd72555de9d0805cecdf9c1f658af1aeb
3
+ size 240333
AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.53.1"
14
+ }
AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/main.log ADDED
The diff for this file is too large to render. See raw diff
 
AQUA/Qwen2.5-1.5B-Instruct_gaussian_dapo/merges.txt ADDED
The diff for this file is too large to render. See raw diff