yuccaaa commited on
Commit
9627ce0
·
verified ·
1 Parent(s): 31ec239

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/args.json +364 -0
  2. BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/config.json +29 -0
  3. BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/generation_config.json +14 -0
  4. BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/merges.txt +0 -0
  5. BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/model.safetensors.index.json +346 -0
  6. BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/special_tokens_map.json +31 -0
  7. BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/tokenizer_config.json +208 -0
  8. BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/vocab.json +0 -0
  9. BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-835/README.md +202 -0
  10. BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-835/adapter_config.json +39 -0
  11. BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-835/additional_config.json +1 -0
  12. BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-835/args.json +364 -0
  13. EasyR1-new/.gitignore +181 -0
  14. EasyR1-new/.pre-commit-config.yaml +22 -0
  15. EasyR1-new/Dockerfile +65 -0
  16. EasyR1-new/Dockerfile.legacy +72 -0
  17. EasyR1-new/LICENSE +201 -0
  18. EasyR1-new/Makefile +24 -0
  19. EasyR1-new/README.md +223 -0
  20. EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/files/media/table/val/generations_0_2feffd203d182aadef48.table.json +1 -0
  21. EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/files/requirements.txt +216 -0
  22. EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/files/wandb-metadata.json +91 -0
  23. EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/files/wandb-summary.json +1 -0
  24. EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/logs/debug-internal.log +16 -0
  25. EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/logs/debug.log +28 -0
  26. EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/config.yaml +230 -0
  27. EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/media/table/val/generations_0_2feffd203d182aadef48.table.json +1 -0
  28. EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/media/table/val/generations_104_ccf2e4d7f5a8bd440fdc.table.json +0 -0
  29. EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/media/table/val/generations_109_dfde4027f4468ecb4bdb.table.json +0 -0
  30. EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/output.log +0 -0
  31. EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/requirements.txt +216 -0
  32. EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/wandb-metadata.json +91 -0
  33. EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/wandb-summary.json +1 -0
  34. EasyR1/verl.egg-info/PKG-INFO +264 -0
  35. EasyR1/verl.egg-info/SOURCES.txt +71 -0
  36. EasyR1/verl.egg-info/dependency_links.txt +1 -0
  37. EasyR1/verl.egg-info/requires.txt +23 -0
  38. EasyR1/verl.egg-info/top_level.txt +1 -0
  39. EasyR1/verl/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc +0 -0
  40. EasyR1/verl/workers/sharding_manager/__pycache__/base.cpython-310.pyc +0 -0
  41. EasyR1/verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc +0 -0
  42. EasyR1/verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc +0 -0
  43. EasyR1/verl/workers/sharding_manager/fsdp_ulysses.py +65 -0
  44. EasyR1/verl/workers/sharding_manager/fsdp_vllm.py +149 -0
  45. LAVIS-main/lavis/models/alpro_models/alpro_retrieval.py +422 -0
  46. LAVIS-main/lavis/models/beats/BEATs.py +180 -0
  47. LAVIS-main/lavis/models/beats/LICENSE_BEATs.txt +21 -0
  48. LAVIS-main/lavis/models/beats/README.md +127 -0
  49. LAVIS-main/lavis/models/beats/Tokenizers.py +173 -0
  50. LAVIS-main/lavis/models/beats/backbone.py +783 -0
BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/args.json ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "/oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged",
3
+ "model_type": "qwen2_5",
4
+ "model_revision": null,
5
+ "task_type": "causal_lm",
6
+ "torch_dtype": "bfloat16",
7
+ "attn_impl": null,
8
+ "num_labels": null,
9
+ "problem_type": null,
10
+ "rope_scaling": null,
11
+ "device_map": null,
12
+ "max_memory": {},
13
+ "local_repo_path": null,
14
+ "template": "qwen2_5",
15
+ "system": null,
16
+ "max_length": 8192,
17
+ "truncation_strategy": "delete",
18
+ "max_pixels": null,
19
+ "tools_prompt": "react_en",
20
+ "norm_bbox": null,
21
+ "response_prefix": null,
22
+ "padding_side": "right",
23
+ "loss_scale": "default",
24
+ "sequence_parallel_size": 1,
25
+ "use_chat_template": true,
26
+ "template_backend": "swift",
27
+ "dataset": [
28
+ "/nas/shared/kilab/wangyujia/material_production_train.jsonl"
29
+ ],
30
+ "val_dataset": [],
31
+ "split_dataset_ratio": 0.01,
32
+ "data_seed": 42,
33
+ "dataset_num_proc": 128,
34
+ "dataset_shuffle": true,
35
+ "val_dataset_shuffle": false,
36
+ "streaming": false,
37
+ "interleave_prob": null,
38
+ "stopping_strategy": "first_exhausted",
39
+ "shuffle_buffer_size": 1000,
40
+ "enable_cache": false,
41
+ "download_mode": "reuse_dataset_if_exists",
42
+ "columns": {},
43
+ "strict": false,
44
+ "remove_unused_columns": true,
45
+ "model_name": [
46
+ "qwen_bio_sft_deeplocbinary-08022035"
47
+ ],
48
+ "model_author": [
49
+ "swift"
50
+ ],
51
+ "custom_dataset_info": [],
52
+ "quant_method": null,
53
+ "quant_bits": null,
54
+ "hqq_axis": null,
55
+ "bnb_4bit_compute_dtype": "bfloat16",
56
+ "bnb_4bit_quant_type": "nf4",
57
+ "bnb_4bit_use_double_quant": true,
58
+ "bnb_4bit_quant_storage": null,
59
+ "max_new_tokens": 64,
60
+ "temperature": 0.0,
61
+ "top_k": null,
62
+ "top_p": null,
63
+ "repetition_penalty": null,
64
+ "num_beams": 1,
65
+ "stream": false,
66
+ "stop_words": [],
67
+ "logprobs": false,
68
+ "top_logprobs": null,
69
+ "ckpt_dir": "/oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged",
70
+ "load_dataset_config": null,
71
+ "lora_modules": [],
72
+ "tuner_backend": "peft",
73
+ "train_type": "lora",
74
+ "adapters": [],
75
+ "external_plugins": [],
76
+ "seed": 42,
77
+ "model_kwargs": {},
78
+ "load_args": false,
79
+ "load_data_args": false,
80
+ "use_hf": false,
81
+ "hub_token": null,
82
+ "custom_register_path": [],
83
+ "ignore_args_error": false,
84
+ "use_swift_lora": false,
85
+ "output_dir": "/nas/shared/kilab/wangyujia/BIO/sft/qwen-production-08022302/v0-20250802-230250",
86
+ "overwrite_output_dir": false,
87
+ "do_train": false,
88
+ "do_eval": false,
89
+ "do_predict": false,
90
+ "eval_strategy": "steps",
91
+ "prediction_loss_only": false,
92
+ "per_device_train_batch_size": 2,
93
+ "per_device_eval_batch_size": 2,
94
+ "per_gpu_train_batch_size": null,
95
+ "per_gpu_eval_batch_size": null,
96
+ "gradient_accumulation_steps": 4,
97
+ "eval_accumulation_steps": null,
98
+ "eval_delay": 0,
99
+ "torch_empty_cache_steps": null,
100
+ "learning_rate": 1e-05,
101
+ "weight_decay": 0.1,
102
+ "adam_beta1": 0.9,
103
+ "adam_beta2": 0.95,
104
+ "adam_epsilon": 1e-08,
105
+ "max_grad_norm": 1.0,
106
+ "num_train_epochs": 3.0,
107
+ "max_steps": -1,
108
+ "lr_scheduler_type": "cosine",
109
+ "lr_scheduler_kwargs": null,
110
+ "warmup_ratio": 0.05,
111
+ "warmup_steps": 0,
112
+ "log_level": "passive",
113
+ "log_level_replica": "warning",
114
+ "log_on_each_node": true,
115
+ "logging_dir": "/nas/shared/kilab/wangyujia/BIO/sft/qwen-production-08022302/v0-20250802-230250/runs",
116
+ "logging_strategy": "steps",
117
+ "logging_first_step": true,
118
+ "logging_steps": 1,
119
+ "logging_nan_inf_filter": true,
120
+ "save_strategy": "steps",
121
+ "save_steps": 5.0,
122
+ "save_total_limit": 5,
123
+ "save_safetensors": true,
124
+ "save_on_each_node": false,
125
+ "save_only_model": true,
126
+ "restore_callback_states_from_checkpoint": false,
127
+ "no_cuda": false,
128
+ "use_cpu": false,
129
+ "use_mps_device": false,
130
+ "jit_mode_eval": false,
131
+ "use_ipex": false,
132
+ "bf16": true,
133
+ "fp16": false,
134
+ "fp16_opt_level": "O1",
135
+ "half_precision_backend": "auto",
136
+ "bf16_full_eval": false,
137
+ "fp16_full_eval": false,
138
+ "tf32": null,
139
+ "local_rank": 0,
140
+ "ddp_backend": null,
141
+ "tpu_num_cores": null,
142
+ "tpu_metrics_debug": false,
143
+ "debug": null,
144
+ "dataloader_drop_last": false,
145
+ "eval_steps": 5.0,
146
+ "dataloader_num_workers": 1,
147
+ "dataloader_prefetch_factor": null,
148
+ "past_index": -1,
149
+ "run_name": "construct",
150
+ "disable_tqdm": null,
151
+ "label_names": null,
152
+ "load_best_model_at_end": false,
153
+ "metric_for_best_model": "loss",
154
+ "greater_is_better": false,
155
+ "ignore_data_skip": false,
156
+ "fsdp": "",
157
+ "fsdp_min_num_params": 0,
158
+ "fsdp_config": null,
159
+ "tp_size": 0,
160
+ "fsdp_transformer_layer_cls_to_wrap": null,
161
+ "accelerator_config": {
162
+ "dispatch_batches": false
163
+ },
164
+ "deepspeed": {
165
+ "fp16": {
166
+ "enabled": "auto",
167
+ "loss_scale": 0,
168
+ "loss_scale_window": 1000,
169
+ "initial_scale_power": 16,
170
+ "hysteresis": 2,
171
+ "min_loss_scale": 1
172
+ },
173
+ "bf16": {
174
+ "enabled": "auto"
175
+ },
176
+ "zero_optimization": {
177
+ "stage": 3,
178
+ "offload_optimizer": {
179
+ "device": "none",
180
+ "pin_memory": true
181
+ },
182
+ "offload_param": {
183
+ "device": "none",
184
+ "pin_memory": true
185
+ },
186
+ "overlap_comm": false,
187
+ "contiguous_gradients": true,
188
+ "sub_group_size": 1000000000.0,
189
+ "reduce_bucket_size": "auto",
190
+ "zero_quantized_weights": false,
191
+ "zero_quantized_gradients": false,
192
+ "stage3_prefetch_bucket_size": "auto",
193
+ "stage3_param_persistence_threshold": "auto",
194
+ "stage3_max_live_parameters": 1000000000.0,
195
+ "stage3_max_reuse_distance": 1000000000.0,
196
+ "stage3_gather_16bit_weights_on_model_save": true
197
+ },
198
+ "gradient_accumulation_steps": "auto",
199
+ "gradient_clipping": "auto",
200
+ "steps_per_print": 2000,
201
+ "train_batch_size": "auto",
202
+ "train_micro_batch_size_per_gpu": "auto",
203
+ "wall_clock_breakdown": false
204
+ },
205
+ "label_smoothing_factor": 0.0,
206
+ "optim": "adamw_torch",
207
+ "optim_args": null,
208
+ "adafactor": false,
209
+ "group_by_length": false,
210
+ "length_column_name": "length",
211
+ "report_to": [
212
+ "tensorboard"
213
+ ],
214
+ "ddp_find_unused_parameters": null,
215
+ "ddp_bucket_cap_mb": null,
216
+ "ddp_broadcast_buffers": null,
217
+ "dataloader_pin_memory": true,
218
+ "dataloader_persistent_workers": false,
219
+ "skip_memory_metrics": true,
220
+ "use_legacy_prediction_loop": false,
221
+ "push_to_hub": false,
222
+ "resume_from_checkpoint": null,
223
+ "hub_model_id": null,
224
+ "hub_strategy": "every_save",
225
+ "hub_private_repo": null,
226
+ "hub_always_push": false,
227
+ "gradient_checkpointing": true,
228
+ "gradient_checkpointing_kwargs": null,
229
+ "include_inputs_for_metrics": false,
230
+ "include_for_metrics": [],
231
+ "eval_do_concat_batches": true,
232
+ "fp16_backend": "auto",
233
+ "push_to_hub_model_id": null,
234
+ "push_to_hub_organization": null,
235
+ "push_to_hub_token": null,
236
+ "mp_parameters": "",
237
+ "auto_find_batch_size": false,
238
+ "full_determinism": false,
239
+ "torchdynamo": null,
240
+ "ray_scope": "last",
241
+ "ddp_timeout": 1800,
242
+ "torch_compile": false,
243
+ "torch_compile_backend": null,
244
+ "torch_compile_mode": null,
245
+ "include_tokens_per_second": false,
246
+ "include_num_input_tokens_seen": false,
247
+ "neftune_noise_alpha": null,
248
+ "optim_target_modules": null,
249
+ "batch_eval_metrics": false,
250
+ "eval_on_start": false,
251
+ "use_liger_kernel": false,
252
+ "eval_use_gather_object": false,
253
+ "average_tokens_across_devices": false,
254
+ "sortish_sampler": false,
255
+ "predict_with_generate": false,
256
+ "generation_max_length": null,
257
+ "generation_num_beams": null,
258
+ "generation_config": null,
259
+ "check_model": true,
260
+ "acc_strategy": "token",
261
+ "train_dataloader_shuffle": true,
262
+ "metric_warmup_step": 0,
263
+ "fsdp_num": 1,
264
+ "acc_steps": 1,
265
+ "eval_use_evalscope": false,
266
+ "eval_datasets": [],
267
+ "eval_limit": null,
268
+ "eval_datasets_args": null,
269
+ "eval_generation_config": null,
270
+ "freeze_parameters": [],
271
+ "freeze_parameters_ratio": 0.0,
272
+ "trainable_parameters": [],
273
+ "freeze_llm": false,
274
+ "freeze_vit": true,
275
+ "freeze_aligner": true,
276
+ "target_modules": [
277
+ "all-linear"
278
+ ],
279
+ "target_regex": null,
280
+ "modules_to_save": [],
281
+ "lora_rank": 8,
282
+ "lora_alpha": 32,
283
+ "lora_dropout": 0.05,
284
+ "lora_bias": "none",
285
+ "lora_dtype": null,
286
+ "lorap_lr_ratio": null,
287
+ "use_rslora": false,
288
+ "use_dora": false,
289
+ "lora_ga_batch_size": 2,
290
+ "lora_ga_iters": 2,
291
+ "lora_ga_max_length": 1024,
292
+ "lora_ga_direction": "ArB2r",
293
+ "lora_ga_scale": "stable",
294
+ "lora_ga_stable_gamma": 16,
295
+ "init_weights": true,
296
+ "fourier_n_frequency": 2000,
297
+ "fourier_scaling": 300.0,
298
+ "boft_block_size": 4,
299
+ "boft_block_num": 0,
300
+ "boft_n_butterfly_factor": 1,
301
+ "boft_dropout": 0.0,
302
+ "vera_rank": 256,
303
+ "vera_projection_prng_key": 0,
304
+ "vera_dropout": 0.0,
305
+ "vera_d_initial": 0.1,
306
+ "adapter_act": "gelu",
307
+ "adapter_length": 128,
308
+ "use_galore": false,
309
+ "galore_target_modules": null,
310
+ "galore_rank": 128,
311
+ "galore_update_proj_gap": 50,
312
+ "galore_scale": 1.0,
313
+ "galore_proj_type": "std",
314
+ "galore_optim_per_parameter": false,
315
+ "galore_with_embedding": false,
316
+ "galore_quantization": false,
317
+ "galore_proj_quant": false,
318
+ "galore_proj_bits": 4,
319
+ "galore_proj_group_size": 256,
320
+ "galore_cos_threshold": 0.4,
321
+ "galore_gamma_proj": 2,
322
+ "galore_queue_size": 5,
323
+ "adalora_target_r": 8,
324
+ "adalora_init_r": 12,
325
+ "adalora_tinit": 0,
326
+ "adalora_tfinal": 0,
327
+ "adalora_deltaT": 1,
328
+ "adalora_beta1": 0.85,
329
+ "adalora_beta2": 0.85,
330
+ "adalora_orth_reg_weight": 0.5,
331
+ "llamapro_num_new_blocks": 4,
332
+ "llamapro_num_groups": null,
333
+ "lisa_activated_layers": 0,
334
+ "lisa_step_interval": 20,
335
+ "reft_layer_key": null,
336
+ "reft_layers": null,
337
+ "reft_rank": 4,
338
+ "reft_intervention_type": "LoreftIntervention",
339
+ "reft_args": null,
340
+ "swanlab_token": null,
341
+ "swanlab_project": null,
342
+ "swanlab_workspace": null,
343
+ "swanlab_exp_name": null,
344
+ "swanlab_mode": "cloud",
345
+ "add_version": true,
346
+ "resume_only_model": false,
347
+ "create_checkpoint_symlink": false,
348
+ "packing": false,
349
+ "lazy_tokenize": false,
350
+ "loss_type": null,
351
+ "optimizer": null,
352
+ "metric": null,
353
+ "zero_hpz_partition_size": null,
354
+ "rank": 0,
355
+ "global_world_size": 8,
356
+ "local_world_size": 8,
357
+ "model_suffix": "checkpoint-50-merged",
358
+ "model_info": "ModelInfo(model_type='qwen2_5', model_dir='/oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged', torch_dtype=torch.bfloat16, max_model_len=32768, quant_method=None, quant_bits=None, rope_scaling=None, config=None, task_type='causal_lm', num_labels=None)",
359
+ "model_meta": "ModelMeta(model_type='qwen2_5', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen2.5-0.5B-Instruct', hf_model_id='Qwen/Qwen2.5-0.5B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-1.5B-Instruct', hf_model_id='Qwen/Qwen2.5-1.5B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-3B-Instruct', hf_model_id='Qwen/Qwen2.5-3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-7B-Instruct', hf_model_id='Qwen/Qwen2.5-7B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-14B-Instruct', hf_model_id='Qwen/Qwen2.5-14B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-32B-Instruct', hf_model_id='Qwen/Qwen2.5-32B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-72B-Instruct', hf_model_id='Qwen/Qwen2.5-72B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-0.5B', hf_model_id='Qwen/Qwen2.5-0.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-1.5B', hf_model_id='Qwen/Qwen2.5-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-3B', hf_model_id='Qwen/Qwen2.5-3B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-7B', hf_model_id='Qwen/Qwen2.5-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-14B', hf_model_id='Qwen/Qwen2.5-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-32B', hf_model_id='Qwen/Qwen2.5-32B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-72B', hf_model_id='Qwen/Qwen2.5-72B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-3B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-3B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-3B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-3B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-14B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-14B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-32B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-32B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-72B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-72B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-0.5B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-0.5B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-1.5B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-1.5B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-3B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-3B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-7B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-7B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-14B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-14B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-32B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-32B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-72B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-72B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[]), ModelGroup(models=[Model(ms_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct', hf_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct', hf_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-3B-Instruct', hf_model_id='Qwen/Qwen2.5-Coder-3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-7B-Instruct', hf_model_id='Qwen/Qwen2.5-Coder-7B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-14B-Instruct', hf_model_id='Qwen/Qwen2.5-Coder-14B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-32B-Instruct', hf_model_id='Qwen/Qwen2.5-Coder-32B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-0.5B', hf_model_id='Qwen/Qwen2.5-Coder-0.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-1.5B', hf_model_id='Qwen/Qwen2.5-Coder-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-3B', hf_model_id='Qwen/Qwen2.5-Coder-3B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-7B', hf_model_id='Qwen/Qwen2.5-Coder-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-14B', hf_model_id='Qwen/Qwen2.5-Coder-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-32B', hf_model_id='Qwen/Qwen2.5-Coder-32B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-3B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-Coder-3B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-7B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-Coder-7B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-14B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-Coder-14B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-32B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-Coder-32B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=['coding'])], template='qwen2_5', get_function=<function get_model_tokenizer_with_flash_attn at 0x7f22ea5f5e10>, model_arch='llama', architectures=['Qwen2ForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=['transformers>=4.37'], tags=[])",
360
+ "model_dir": "/oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged",
361
+ "hub": "<class 'swift.hub.hub.MSHub'>",
362
+ "evaluation_strategy": "steps",
363
+ "training_args": "Seq2SeqTrainingArguments(output_dir='/nas/shared/kilab/wangyujia/BIO/sft/qwen-production-08022302/v0-20250802-230250', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=<IntervalStrategy.STEPS: 'steps'>, prediction_loss_only=False, per_device_train_batch_size=2, per_device_eval_batch_size=2, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=4, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=1e-05, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.95, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=<SchedulerType.COSINE: 'cosine'>, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/nas/shared/kilab/wangyujia/BIO/sft/qwen-production-08022302/v0-20250802-230250/runs', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=True, logging_steps=1, logging_nan_inf_filter=True, save_strategy=<SaveStrategy.STEPS: 'steps'>, save_steps=5, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=True, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=5, dataloader_num_workers=1, dataloader_prefetch_factor=10, past_index=-1, run_name='/nas/shared/kilab/wangyujia/BIO/sft/qwen-production-08022302/v0-20250802-230250', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, tp_size=0, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': False, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'zero_quantized_weights': False, 'zero_quantized_gradients': False, 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=<OptimizerNames.ADAMW_TORCH: 'adamw_torch'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, sortish_sampler=False, predict_with_generate=False, generation_max_length=None, generation_num_beams=None, generation_config=None, check_model=True, acc_strategy='token', train_dataloader_shuffle=True, metric_warmup_step=0, fsdp_num=1, acc_steps=1, eval_use_evalscope=False, eval_datasets=[], eval_limit=None, eval_datasets_args=None, eval_generation_config=None, train_type='lora', optimizer=None, local_repo_path=None, galore_config=None)"
364
+ }
BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 3584,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 18944,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 28,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 28,
16
+ "num_hidden_layers": 28,
17
+ "num_key_value_heads": 4,
18
+ "pad_token_id": 151643,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": 131072,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.51.3",
26
+ "use_cache": false,
27
+ "use_sliding_window": false,
28
+ "vocab_size": 152064
29
+ }
BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.05,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.51.3"
14
+ }
BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/model.safetensors.index.json ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 15231233024
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00004-of-00004.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
13
+ "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
14
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
15
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
16
+ "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
17
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
18
+ "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
19
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
20
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
21
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
22
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
23
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
24
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
25
+ "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
26
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
27
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
28
+ "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
29
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
30
+ "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
31
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
32
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
33
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
34
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
35
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
36
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
37
+ "model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
38
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
39
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
40
+ "model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
41
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
42
+ "model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
43
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
44
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
45
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
46
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
47
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
48
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
49
+ "model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
50
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
51
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
52
+ "model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
53
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
54
+ "model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
55
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
56
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
57
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
58
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
59
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
60
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
61
+ "model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
62
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
63
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
64
+ "model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
65
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
66
+ "model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
67
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
68
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
69
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
70
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
71
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
72
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
73
+ "model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
74
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
75
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
76
+ "model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
77
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
78
+ "model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
79
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
80
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
81
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
82
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
83
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
84
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
85
+ "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
86
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
87
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
88
+ "model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
89
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
90
+ "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
91
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
92
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
93
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
94
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
95
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
96
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
97
+ "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
98
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
99
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
100
+ "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
101
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
102
+ "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
103
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
104
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
105
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
106
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
107
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
108
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
109
+ "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
110
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
111
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
112
+ "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
113
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
114
+ "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
115
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
116
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
117
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
118
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
119
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
120
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
121
+ "model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
122
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
123
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
124
+ "model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
125
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
126
+ "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
127
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
128
+ "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
129
+ "model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
130
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
131
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
132
+ "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
133
+ "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
134
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
135
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
136
+ "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
137
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
138
+ "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
139
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
140
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
141
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
142
+ "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
143
+ "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
144
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
145
+ "model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
146
+ "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
147
+ "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
148
+ "model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
149
+ "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
150
+ "model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
151
+ "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
152
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
153
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
154
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
155
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
156
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
157
+ "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
158
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
159
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
160
+ "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
161
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
162
+ "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
163
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
164
+ "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
165
+ "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
166
+ "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
167
+ "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
168
+ "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
169
+ "model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
170
+ "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
171
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
172
+ "model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
173
+ "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
174
+ "model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
175
+ "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
176
+ "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
177
+ "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
178
+ "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
179
+ "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
180
+ "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
181
+ "model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
182
+ "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
183
+ "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
184
+ "model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
185
+ "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
186
+ "model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
187
+ "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
188
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
189
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
190
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
191
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
192
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
193
+ "model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
194
+ "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
195
+ "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
196
+ "model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
197
+ "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
198
+ "model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
199
+ "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
200
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
201
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
202
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
203
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
204
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
205
+ "model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
206
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
207
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
208
+ "model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
209
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
210
+ "model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
211
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
212
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
213
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
214
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
215
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
216
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
217
+ "model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
218
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
219
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
220
+ "model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
221
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
222
+ "model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
223
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
224
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
225
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
226
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
227
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
228
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
229
+ "model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
230
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
231
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
232
+ "model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
233
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
234
+ "model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
235
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
236
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
237
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
238
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
239
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
240
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
241
+ "model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
242
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
243
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
244
+ "model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
245
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
246
+ "model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
247
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
248
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
249
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
250
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
251
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
252
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
253
+ "model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
254
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
255
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
256
+ "model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
257
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
258
+ "model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
259
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
260
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
261
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
262
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
263
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
264
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
265
+ "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
266
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
267
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
268
+ "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
269
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
270
+ "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
271
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
272
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
273
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
274
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
275
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
276
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
277
+ "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
278
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
279
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
280
+ "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
281
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
282
+ "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
283
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
284
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
285
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
286
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
287
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
288
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
289
+ "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
290
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
291
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
292
+ "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
293
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
294
+ "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
295
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
296
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
297
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
298
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
299
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
300
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
301
+ "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
302
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
303
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
304
+ "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
305
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
306
+ "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
307
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
308
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
309
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
310
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
311
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
312
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
313
+ "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
314
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
315
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
316
+ "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
317
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
318
+ "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
319
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
320
+ "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
321
+ "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
322
+ "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
323
+ "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
324
+ "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
325
+ "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
326
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
327
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
328
+ "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
329
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
330
+ "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
331
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
332
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
333
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
334
+ "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
335
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
336
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
337
+ "model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
338
+ "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
339
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
340
+ "model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
341
+ "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
342
+ "model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
343
+ "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
344
+ "model.norm.weight": "model-00003-of-00004.safetensors"
345
+ }
346
+ }
BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "extra_special_tokens": {},
203
+ "model_max_length": 131072,
204
+ "pad_token": "<|endoftext|>",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-1029-merged/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-835/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: /oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.15.2
BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-835/adapter_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": [],
22
+ "peft_type": "LORA",
23
+ "r": 8,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "v_proj",
28
+ "down_proj",
29
+ "k_proj",
30
+ "up_proj",
31
+ "gate_proj",
32
+ "q_proj",
33
+ "o_proj"
34
+ ],
35
+ "task_type": "CAUSAL_LM",
36
+ "trainable_token_indices": null,
37
+ "use_dora": false,
38
+ "use_rslora": false
39
+ }
BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-835/additional_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"lora_dtype": null, "lorap_lr_ratio": null, "lorap_emb_lr": 1e-06}
BIO/sft/qwen-production-08022302/v0-20250802-230250/checkpoint-835/args.json ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "/oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged",
3
+ "model_type": "qwen2_5",
4
+ "model_revision": null,
5
+ "task_type": "causal_lm",
6
+ "torch_dtype": "bfloat16",
7
+ "attn_impl": null,
8
+ "num_labels": null,
9
+ "problem_type": null,
10
+ "rope_scaling": null,
11
+ "device_map": null,
12
+ "max_memory": {},
13
+ "local_repo_path": null,
14
+ "template": "qwen2_5",
15
+ "system": null,
16
+ "max_length": 8192,
17
+ "truncation_strategy": "delete",
18
+ "max_pixels": null,
19
+ "tools_prompt": "react_en",
20
+ "norm_bbox": null,
21
+ "response_prefix": null,
22
+ "padding_side": "right",
23
+ "loss_scale": "default",
24
+ "sequence_parallel_size": 1,
25
+ "use_chat_template": true,
26
+ "template_backend": "swift",
27
+ "dataset": [
28
+ "/nas/shared/kilab/wangyujia/material_production_train.jsonl"
29
+ ],
30
+ "val_dataset": [],
31
+ "split_dataset_ratio": 0.01,
32
+ "data_seed": 42,
33
+ "dataset_num_proc": 128,
34
+ "dataset_shuffle": true,
35
+ "val_dataset_shuffle": false,
36
+ "streaming": false,
37
+ "interleave_prob": null,
38
+ "stopping_strategy": "first_exhausted",
39
+ "shuffle_buffer_size": 1000,
40
+ "enable_cache": false,
41
+ "download_mode": "reuse_dataset_if_exists",
42
+ "columns": {},
43
+ "strict": false,
44
+ "remove_unused_columns": true,
45
+ "model_name": [
46
+ "qwen_bio_sft_deeplocbinary-08022035"
47
+ ],
48
+ "model_author": [
49
+ "swift"
50
+ ],
51
+ "custom_dataset_info": [],
52
+ "quant_method": null,
53
+ "quant_bits": null,
54
+ "hqq_axis": null,
55
+ "bnb_4bit_compute_dtype": "bfloat16",
56
+ "bnb_4bit_quant_type": "nf4",
57
+ "bnb_4bit_use_double_quant": true,
58
+ "bnb_4bit_quant_storage": null,
59
+ "max_new_tokens": 64,
60
+ "temperature": 0.0,
61
+ "top_k": null,
62
+ "top_p": null,
63
+ "repetition_penalty": null,
64
+ "num_beams": 1,
65
+ "stream": false,
66
+ "stop_words": [],
67
+ "logprobs": false,
68
+ "top_logprobs": null,
69
+ "ckpt_dir": "/oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged",
70
+ "load_dataset_config": null,
71
+ "lora_modules": [],
72
+ "tuner_backend": "peft",
73
+ "train_type": "lora",
74
+ "adapters": [],
75
+ "external_plugins": [],
76
+ "seed": 42,
77
+ "model_kwargs": {},
78
+ "load_args": false,
79
+ "load_data_args": false,
80
+ "use_hf": false,
81
+ "hub_token": null,
82
+ "custom_register_path": [],
83
+ "ignore_args_error": false,
84
+ "use_swift_lora": false,
85
+ "output_dir": "/nas/shared/kilab/wangyujia/BIO/sft/qwen-production-08022302/v0-20250802-230250",
86
+ "overwrite_output_dir": false,
87
+ "do_train": false,
88
+ "do_eval": false,
89
+ "do_predict": false,
90
+ "eval_strategy": "steps",
91
+ "prediction_loss_only": false,
92
+ "per_device_train_batch_size": 2,
93
+ "per_device_eval_batch_size": 2,
94
+ "per_gpu_train_batch_size": null,
95
+ "per_gpu_eval_batch_size": null,
96
+ "gradient_accumulation_steps": 4,
97
+ "eval_accumulation_steps": null,
98
+ "eval_delay": 0,
99
+ "torch_empty_cache_steps": null,
100
+ "learning_rate": 1e-05,
101
+ "weight_decay": 0.1,
102
+ "adam_beta1": 0.9,
103
+ "adam_beta2": 0.95,
104
+ "adam_epsilon": 1e-08,
105
+ "max_grad_norm": 1.0,
106
+ "num_train_epochs": 3.0,
107
+ "max_steps": -1,
108
+ "lr_scheduler_type": "cosine",
109
+ "lr_scheduler_kwargs": null,
110
+ "warmup_ratio": 0.05,
111
+ "warmup_steps": 0,
112
+ "log_level": "passive",
113
+ "log_level_replica": "warning",
114
+ "log_on_each_node": true,
115
+ "logging_dir": "/nas/shared/kilab/wangyujia/BIO/sft/qwen-production-08022302/v0-20250802-230250/runs",
116
+ "logging_strategy": "steps",
117
+ "logging_first_step": true,
118
+ "logging_steps": 1,
119
+ "logging_nan_inf_filter": true,
120
+ "save_strategy": "steps",
121
+ "save_steps": 5.0,
122
+ "save_total_limit": 5,
123
+ "save_safetensors": true,
124
+ "save_on_each_node": false,
125
+ "save_only_model": true,
126
+ "restore_callback_states_from_checkpoint": false,
127
+ "no_cuda": false,
128
+ "use_cpu": false,
129
+ "use_mps_device": false,
130
+ "jit_mode_eval": false,
131
+ "use_ipex": false,
132
+ "bf16": true,
133
+ "fp16": false,
134
+ "fp16_opt_level": "O1",
135
+ "half_precision_backend": "auto",
136
+ "bf16_full_eval": false,
137
+ "fp16_full_eval": false,
138
+ "tf32": null,
139
+ "local_rank": 0,
140
+ "ddp_backend": null,
141
+ "tpu_num_cores": null,
142
+ "tpu_metrics_debug": false,
143
+ "debug": null,
144
+ "dataloader_drop_last": false,
145
+ "eval_steps": 5.0,
146
+ "dataloader_num_workers": 1,
147
+ "dataloader_prefetch_factor": null,
148
+ "past_index": -1,
149
+ "run_name": "construct",
150
+ "disable_tqdm": null,
151
+ "label_names": null,
152
+ "load_best_model_at_end": false,
153
+ "metric_for_best_model": "loss",
154
+ "greater_is_better": false,
155
+ "ignore_data_skip": false,
156
+ "fsdp": "",
157
+ "fsdp_min_num_params": 0,
158
+ "fsdp_config": null,
159
+ "tp_size": 0,
160
+ "fsdp_transformer_layer_cls_to_wrap": null,
161
+ "accelerator_config": {
162
+ "dispatch_batches": false
163
+ },
164
+ "deepspeed": {
165
+ "fp16": {
166
+ "enabled": "auto",
167
+ "loss_scale": 0,
168
+ "loss_scale_window": 1000,
169
+ "initial_scale_power": 16,
170
+ "hysteresis": 2,
171
+ "min_loss_scale": 1
172
+ },
173
+ "bf16": {
174
+ "enabled": "auto"
175
+ },
176
+ "zero_optimization": {
177
+ "stage": 3,
178
+ "offload_optimizer": {
179
+ "device": "none",
180
+ "pin_memory": true
181
+ },
182
+ "offload_param": {
183
+ "device": "none",
184
+ "pin_memory": true
185
+ },
186
+ "overlap_comm": false,
187
+ "contiguous_gradients": true,
188
+ "sub_group_size": 1000000000.0,
189
+ "reduce_bucket_size": "auto",
190
+ "zero_quantized_weights": false,
191
+ "zero_quantized_gradients": false,
192
+ "stage3_prefetch_bucket_size": "auto",
193
+ "stage3_param_persistence_threshold": "auto",
194
+ "stage3_max_live_parameters": 1000000000.0,
195
+ "stage3_max_reuse_distance": 1000000000.0,
196
+ "stage3_gather_16bit_weights_on_model_save": true
197
+ },
198
+ "gradient_accumulation_steps": "auto",
199
+ "gradient_clipping": "auto",
200
+ "steps_per_print": 2000,
201
+ "train_batch_size": "auto",
202
+ "train_micro_batch_size_per_gpu": "auto",
203
+ "wall_clock_breakdown": false
204
+ },
205
+ "label_smoothing_factor": 0.0,
206
+ "optim": "adamw_torch",
207
+ "optim_args": null,
208
+ "adafactor": false,
209
+ "group_by_length": false,
210
+ "length_column_name": "length",
211
+ "report_to": [
212
+ "tensorboard"
213
+ ],
214
+ "ddp_find_unused_parameters": null,
215
+ "ddp_bucket_cap_mb": null,
216
+ "ddp_broadcast_buffers": null,
217
+ "dataloader_pin_memory": true,
218
+ "dataloader_persistent_workers": false,
219
+ "skip_memory_metrics": true,
220
+ "use_legacy_prediction_loop": false,
221
+ "push_to_hub": false,
222
+ "resume_from_checkpoint": null,
223
+ "hub_model_id": null,
224
+ "hub_strategy": "every_save",
225
+ "hub_private_repo": null,
226
+ "hub_always_push": false,
227
+ "gradient_checkpointing": true,
228
+ "gradient_checkpointing_kwargs": null,
229
+ "include_inputs_for_metrics": false,
230
+ "include_for_metrics": [],
231
+ "eval_do_concat_batches": true,
232
+ "fp16_backend": "auto",
233
+ "push_to_hub_model_id": null,
234
+ "push_to_hub_organization": null,
235
+ "push_to_hub_token": null,
236
+ "mp_parameters": "",
237
+ "auto_find_batch_size": false,
238
+ "full_determinism": false,
239
+ "torchdynamo": null,
240
+ "ray_scope": "last",
241
+ "ddp_timeout": 1800,
242
+ "torch_compile": false,
243
+ "torch_compile_backend": null,
244
+ "torch_compile_mode": null,
245
+ "include_tokens_per_second": false,
246
+ "include_num_input_tokens_seen": false,
247
+ "neftune_noise_alpha": null,
248
+ "optim_target_modules": null,
249
+ "batch_eval_metrics": false,
250
+ "eval_on_start": false,
251
+ "use_liger_kernel": false,
252
+ "eval_use_gather_object": false,
253
+ "average_tokens_across_devices": false,
254
+ "sortish_sampler": false,
255
+ "predict_with_generate": false,
256
+ "generation_max_length": null,
257
+ "generation_num_beams": null,
258
+ "generation_config": null,
259
+ "check_model": true,
260
+ "acc_strategy": "token",
261
+ "train_dataloader_shuffle": true,
262
+ "metric_warmup_step": 0,
263
+ "fsdp_num": 1,
264
+ "acc_steps": 1,
265
+ "eval_use_evalscope": false,
266
+ "eval_datasets": [],
267
+ "eval_limit": null,
268
+ "eval_datasets_args": null,
269
+ "eval_generation_config": null,
270
+ "freeze_parameters": [],
271
+ "freeze_parameters_ratio": 0.0,
272
+ "trainable_parameters": [],
273
+ "freeze_llm": false,
274
+ "freeze_vit": true,
275
+ "freeze_aligner": true,
276
+ "target_modules": [
277
+ "all-linear"
278
+ ],
279
+ "target_regex": null,
280
+ "modules_to_save": [],
281
+ "lora_rank": 8,
282
+ "lora_alpha": 32,
283
+ "lora_dropout": 0.05,
284
+ "lora_bias": "none",
285
+ "lora_dtype": null,
286
+ "lorap_lr_ratio": null,
287
+ "use_rslora": false,
288
+ "use_dora": false,
289
+ "lora_ga_batch_size": 2,
290
+ "lora_ga_iters": 2,
291
+ "lora_ga_max_length": 1024,
292
+ "lora_ga_direction": "ArB2r",
293
+ "lora_ga_scale": "stable",
294
+ "lora_ga_stable_gamma": 16,
295
+ "init_weights": true,
296
+ "fourier_n_frequency": 2000,
297
+ "fourier_scaling": 300.0,
298
+ "boft_block_size": 4,
299
+ "boft_block_num": 0,
300
+ "boft_n_butterfly_factor": 1,
301
+ "boft_dropout": 0.0,
302
+ "vera_rank": 256,
303
+ "vera_projection_prng_key": 0,
304
+ "vera_dropout": 0.0,
305
+ "vera_d_initial": 0.1,
306
+ "adapter_act": "gelu",
307
+ "adapter_length": 128,
308
+ "use_galore": false,
309
+ "galore_target_modules": null,
310
+ "galore_rank": 128,
311
+ "galore_update_proj_gap": 50,
312
+ "galore_scale": 1.0,
313
+ "galore_proj_type": "std",
314
+ "galore_optim_per_parameter": false,
315
+ "galore_with_embedding": false,
316
+ "galore_quantization": false,
317
+ "galore_proj_quant": false,
318
+ "galore_proj_bits": 4,
319
+ "galore_proj_group_size": 256,
320
+ "galore_cos_threshold": 0.4,
321
+ "galore_gamma_proj": 2,
322
+ "galore_queue_size": 5,
323
+ "adalora_target_r": 8,
324
+ "adalora_init_r": 12,
325
+ "adalora_tinit": 0,
326
+ "adalora_tfinal": 0,
327
+ "adalora_deltaT": 1,
328
+ "adalora_beta1": 0.85,
329
+ "adalora_beta2": 0.85,
330
+ "adalora_orth_reg_weight": 0.5,
331
+ "llamapro_num_new_blocks": 4,
332
+ "llamapro_num_groups": null,
333
+ "lisa_activated_layers": 0,
334
+ "lisa_step_interval": 20,
335
+ "reft_layer_key": null,
336
+ "reft_layers": null,
337
+ "reft_rank": 4,
338
+ "reft_intervention_type": "LoreftIntervention",
339
+ "reft_args": null,
340
+ "swanlab_token": null,
341
+ "swanlab_project": null,
342
+ "swanlab_workspace": null,
343
+ "swanlab_exp_name": null,
344
+ "swanlab_mode": "cloud",
345
+ "add_version": true,
346
+ "resume_only_model": false,
347
+ "create_checkpoint_symlink": false,
348
+ "packing": false,
349
+ "lazy_tokenize": false,
350
+ "loss_type": null,
351
+ "optimizer": null,
352
+ "metric": null,
353
+ "zero_hpz_partition_size": null,
354
+ "rank": 0,
355
+ "global_world_size": 8,
356
+ "local_world_size": 8,
357
+ "model_suffix": "checkpoint-50-merged",
358
+ "model_info": "ModelInfo(model_type='qwen2_5', model_dir='/oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged', torch_dtype=torch.bfloat16, max_model_len=32768, quant_method=None, quant_bits=None, rope_scaling=None, config=None, task_type='causal_lm', num_labels=None)",
359
+ "model_meta": "ModelMeta(model_type='qwen2_5', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen2.5-0.5B-Instruct', hf_model_id='Qwen/Qwen2.5-0.5B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-1.5B-Instruct', hf_model_id='Qwen/Qwen2.5-1.5B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-3B-Instruct', hf_model_id='Qwen/Qwen2.5-3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-7B-Instruct', hf_model_id='Qwen/Qwen2.5-7B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-14B-Instruct', hf_model_id='Qwen/Qwen2.5-14B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-32B-Instruct', hf_model_id='Qwen/Qwen2.5-32B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-72B-Instruct', hf_model_id='Qwen/Qwen2.5-72B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-0.5B', hf_model_id='Qwen/Qwen2.5-0.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-1.5B', hf_model_id='Qwen/Qwen2.5-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-3B', hf_model_id='Qwen/Qwen2.5-3B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-7B', hf_model_id='Qwen/Qwen2.5-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-14B', hf_model_id='Qwen/Qwen2.5-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-32B', hf_model_id='Qwen/Qwen2.5-32B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-72B', hf_model_id='Qwen/Qwen2.5-72B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-3B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-3B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-72B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-3B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-3B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-7B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-14B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-14B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-32B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-32B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-72B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-72B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-0.5B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-0.5B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-1.5B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-1.5B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-3B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-3B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-7B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-7B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-14B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-14B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-32B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-32B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-72B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-72B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[]), ModelGroup(models=[Model(ms_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct', hf_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct', hf_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-3B-Instruct', hf_model_id='Qwen/Qwen2.5-Coder-3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-7B-Instruct', hf_model_id='Qwen/Qwen2.5-Coder-7B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-14B-Instruct', hf_model_id='Qwen/Qwen2.5-Coder-14B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-32B-Instruct', hf_model_id='Qwen/Qwen2.5-Coder-32B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-0.5B', hf_model_id='Qwen/Qwen2.5-Coder-0.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-1.5B', hf_model_id='Qwen/Qwen2.5-Coder-1.5B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-3B', hf_model_id='Qwen/Qwen2.5-Coder-3B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-7B', hf_model_id='Qwen/Qwen2.5-Coder-7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-14B', hf_model_id='Qwen/Qwen2.5-Coder-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-32B', hf_model_id='Qwen/Qwen2.5-Coder-32B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-3B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-Coder-3B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-7B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-Coder-7B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-14B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-Coder-14B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-32B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-Coder-32B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int4', hf_model_id='Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int4', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8', hf_model_id='Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=['coding'])], template='qwen2_5', get_function=<function get_model_tokenizer_with_flash_attn at 0x7f22ea5f5e10>, model_arch='llama', architectures=['Qwen2ForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=['transformers>=4.37'], tags=[])",
360
+ "model_dir": "/oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged",
361
+ "hub": "<class 'swift.hub.hub.MSHub'>",
362
+ "evaluation_strategy": "steps",
363
+ "training_args": "Seq2SeqTrainingArguments(output_dir='/nas/shared/kilab/wangyujia/BIO/sft/qwen-production-08022302/v0-20250802-230250', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=<IntervalStrategy.STEPS: 'steps'>, prediction_loss_only=False, per_device_train_batch_size=2, per_device_eval_batch_size=2, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=4, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=1e-05, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.95, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3.0, max_steps=-1, lr_scheduler_type=<SchedulerType.COSINE: 'cosine'>, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/nas/shared/kilab/wangyujia/BIO/sft/qwen-production-08022302/v0-20250802-230250/runs', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=True, logging_steps=1, logging_nan_inf_filter=True, save_strategy=<SaveStrategy.STEPS: 'steps'>, save_steps=5, save_total_limit=5, save_safetensors=True, save_on_each_node=False, save_only_model=True, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=5, dataloader_num_workers=1, dataloader_prefetch_factor=10, past_index=-1, run_name='/nas/shared/kilab/wangyujia/BIO/sft/qwen-production-08022302/v0-20250802-230250', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, tp_size=0, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': False, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'zero_quantized_weights': False, 'zero_quantized_gradients': False, 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=<OptimizerNames.ADAMW_TORCH: 'adamw_torch'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, sortish_sampler=False, predict_with_generate=False, generation_max_length=None, generation_num_beams=None, generation_config=None, check_model=True, acc_strategy='token', train_dataloader_shuffle=True, metric_warmup_step=0, fsdp_num=1, acc_steps=1, eval_use_evalscope=False, eval_datasets=[], eval_limit=None, eval_datasets_args=None, eval_generation_config=None, train_type='lora', optimizer=None, local_repo_path=None, galore_config=None)"
364
+ }
EasyR1-new/.gitignore ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # PyPI configuration file
171
+ .pypirc
172
+
173
+ # outputs
174
+ outputs/
175
+ checkpoints/
176
+ wandb/
177
+ tensorboard_log/
178
+
179
+ # data
180
+ images/
181
+ images*
EasyR1-new/.pre-commit-config.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v5.0.0
4
+ hooks:
5
+ - id: check-ast
6
+ - id: check-added-large-files
7
+ args: ['--maxkb=25000']
8
+ - id: check-merge-conflict
9
+ - id: check-yaml
10
+ - id: debug-statements
11
+ - id: end-of-file-fixer
12
+ - id: requirements-txt-fixer
13
+ - id: trailing-whitespace
14
+ args: [--markdown-linebreak-ext=md]
15
+ - id: no-commit-to-branch
16
+ args: ['--branch', 'main']
17
+
18
+ - repo: https://github.com/asottile/pyupgrade
19
+ rev: v3.17.0
20
+ hooks:
21
+ - id: pyupgrade
22
+ args: [--py38-plus]
EasyR1-new/Dockerfile ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Start from the NVIDIA official image (ubuntu-22.04 + cuda-12.6 + python-3.10)
2
+ # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
3
+ FROM nvcr.io/nvidia/pytorch:24.08-py3
4
+
5
+ # Define environments
6
+ ENV MAX_JOBS=32
7
+ ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
8
+ ENV DEBIAN_FRONTEND=noninteractive
9
+ ENV NODE_OPTIONS=""
10
+ ENV PIP_ROOT_USER_ACTION=ignore
11
+ ENV HF_HUB_ENABLE_HF_TRANSFER="1"
12
+
13
+ # Define installation arguments
14
+ ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
15
+ ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
16
+
17
+ # Set apt source
18
+ RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
19
+ { \
20
+ echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
21
+ echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
22
+ echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
23
+ echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
24
+ } > /etc/apt/sources.list
25
+
26
+ # Install systemctl
27
+ RUN apt-get update && \
28
+ apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
29
+ apt-get clean
30
+
31
+ # Install tini
32
+ RUN apt-get update && \
33
+ apt-get install -y tini && \
34
+ apt-get clean
35
+
36
+ # Change pip source
37
+ RUN pip config set global.index-url "${PIP_INDEX}" && \
38
+ pip config set global.extra-index-url "${PIP_INDEX}" && \
39
+ python -m pip install --upgrade pip
40
+
41
+ # Uninstall nv-pytorch fork
42
+ RUN pip uninstall -y torch torchvision torchaudio \
43
+ pytorch-quantization pytorch-triton torch-tensorrt \
44
+ transformer-engine flash-attn apex megatron-core \
45
+ xgboost opencv grpcio
46
+
47
+ # Fix cv2
48
+ RUN rm -rf /usr/local/lib/python3.10/dist-packages/cv2
49
+
50
+ # Install torch-2.7.0+cu126 + vllm-0.9.1
51
+ RUN pip install --no-cache-dir "vllm==0.9.1" "torch==2.7.0" "torchvision==0.22.0" "torchaudio==2.7.0" tensordict torchdata \
52
+ "transformers[hf_xet]>=4.51.0" accelerate datasets peft hf-transfer \
53
+ "numpy<2.0.0" "pyarrow>=15.0.0" "grpcio>=1.62.1" "optree>=0.13.0" pandas \
54
+ ray[default] codetiming hydra-core pylatexenc qwen-vl-utils wandb liger-kernel mathruler \
55
+ pytest yapf py-spy pyext pre-commit ruff
56
+
57
+ # Install flash-attn-2.8.0.post2
58
+ RUN ABI_FLAG=$(python -c "import torch; print('TRUE' if torch._C._GLIBCXX_USE_CXX11_ABI else 'FALSE')") && \
59
+ URL="https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.0.post2/flash_attn-2.8.0.post2+cu12torch2.7cxx11abi${ABI_FLAG}-cp310-cp310-linux_x86_64.whl" && \
60
+ wget -nv -P /opt/tiger "${URL}" && \
61
+ pip install --no-cache-dir "/opt/tiger/$(basename ${URL})"
62
+
63
+ # Reset pip config
64
+ RUN pip config unset global.index-url && \
65
+ pip config unset global.extra-index-url
EasyR1-new/Dockerfile.legacy ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Start from the NVIDIA official image (ubuntu-22.04 + cuda-12.6 + python-3.10)
2
+ # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-08.html
3
+ FROM nvcr.io/nvidia/pytorch:24.08-py3
4
+
5
+ # Define environments
6
+ ENV MAX_JOBS=32
7
+ ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
8
+ ENV DEBIAN_FRONTEND=noninteractive
9
+ ENV NODE_OPTIONS=""
10
+ ENV PIP_ROOT_USER_ACTION=ignore
11
+ ENV HF_HUB_ENABLE_HF_TRANSFER="1"
12
+
13
+ # Define installation arguments
14
+ ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
15
+ ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
16
+
17
+ # Set apt source
18
+ RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
19
+ { \
20
+ echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
21
+ echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
22
+ echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
23
+ echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
24
+ } > /etc/apt/sources.list
25
+
26
+ # Install systemctl
27
+ RUN apt-get update && \
28
+ apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
29
+ apt-get clean
30
+
31
+ # Install tini
32
+ RUN apt-get update && \
33
+ apt-get install -y tini && \
34
+ apt-get clean
35
+
36
+ # Change pip source
37
+ RUN pip config set global.index-url "${PIP_INDEX}" && \
38
+ pip config set global.extra-index-url "${PIP_INDEX}" && \
39
+ python -m pip install --upgrade pip
40
+
41
+ # Uninstall nv-pytorch fork
42
+ RUN pip uninstall -y torch torchvision torchaudio \
43
+ pytorch-quantization pytorch-triton torch-tensorrt \
44
+ transformer_engine flash_attn apex megatron-core \
45
+ xgboost opencv grpcio
46
+
47
+ # Fix cv2
48
+ RUN rm -rf /usr/local/lib/python3.10/dist-packages/cv2
49
+
50
+ # Install torch-2.6.0+cu124 + vllm-0.8.4
51
+ # torch-2.6.0+cu124: cxx11abi=False
52
+ # torch-2.6.0+cu126: cxx11abi=True
53
+ # see https://github.com/flashinfer-ai/flashinfer/issues/911
54
+ RUN pip install --no-cache-dir "vllm==0.8.4" "torch==2.6.0" "torchvision==0.21.0" "torchaudio==2.6.0" tensordict torchdata \
55
+ "transformers[hf_xet]>=4.51.0" accelerate datasets peft hf-transfer \
56
+ "numpy<2.0.0" "pyarrow>=15.0.0" "grpcio>=1.62.1" "optree>=0.13.0" pandas \
57
+ ray[default] codetiming hydra-core pylatexenc qwen-vl-utils wandb liger-kernel mathruler \
58
+ pytest yapf py-spy pyext pre-commit ruff
59
+
60
+ # Install flash-attn-2.7.4.post1 (cxx11abi=False)
61
+ RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
62
+ pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
63
+
64
+ # Install flashinfer-0.2.2.post1+cu124 (cxx11abi=False)
65
+ # vllm-0.8.3 does not support flashinfer>=0.2.3
66
+ # see https://github.com/vllm-project/vllm/pull/15777
67
+ RUN wget -nv https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \
68
+ pip install --no-cache-dir flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl
69
+
70
+ # Reset pip config
71
+ RUN pip config unset global.index-url && \
72
+ pip config unset global.extra-index-url
EasyR1-new/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
EasyR1-new/Makefile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: build commit license quality style test
2
+
3
+ check_dirs := examples scripts tests verl setup.py
4
+
5
+ build:
6
+ python3 setup.py sdist bdist_wheel
7
+
8
+ commit:
9
+ pre-commit install
10
+ pre-commit run --all-files
11
+
12
+ license:
13
+ python3 tests/check_license.py $(check_dirs)
14
+
15
+ quality:
16
+ ruff check $(check_dirs)
17
+ ruff format --check $(check_dirs)
18
+
19
+ style:
20
+ ruff check $(check_dirs) --fix
21
+ ruff format $(check_dirs)
22
+
23
+ test:
24
+ pytest -vv tests/
EasyR1-new/README.md ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EasyR1: An Efficient, Scalable, Multi-Modality RL Training Framework
2
+
3
+ [![GitHub Repo stars](https://img.shields.io/github/stars/hiyouga/EasyR1)](https://github.com/hiyouga/EasyR1/stargazers)
4
+ [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai)
5
+
6
+ ### Used by [Amazon Web Services](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/)
7
+
8
+ This project is a clean fork of the original [veRL](https://github.com/volcengine/verl) project to support vision language models, we thank all the authors for providing such a high-performance RL training framework.
9
+
10
+ EasyR1 is efficient and scalable due to the design of **[HybirdEngine](https://arxiv.org/abs/2409.19256)** and the latest release of **[vLLM](https://github.com/vllm-project/vllm)**'s SPMD mode.
11
+
12
+ ## Features
13
+
14
+ - Supported models
15
+ - Llama3/Qwen2/Qwen2.5/Qwen3 language models
16
+ - Qwen2/Qwen2.5-VL vision language models
17
+ - DeepSeek-R1 distill models
18
+
19
+ - Supported algorithms
20
+ - GRPO
21
+ - DAPO
22
+ - Reinforce++
23
+ - ReMax
24
+ - RLOO
25
+
26
+ - Supported datasets
27
+ - Any text, vision-text dataset in a [specific format](#custom-dataset)
28
+
29
+ - Supported tricks
30
+ - Padding-free training
31
+ - Resuming from checkpoint
32
+ - Wandb & SwanLab & Mlflow & Tensorboard tracking
33
+
34
+ ## Requirements
35
+
36
+ ### Software Requirements
37
+
38
+ - Python 3.9+
39
+ - transformers>=4.51.0
40
+ - flash-attn>=2.4.3
41
+ - vllm>=0.8.3
42
+
43
+ We provide a [Dockerfile](./Dockerfile) to easily build environments.
44
+
45
+ We recommend using the [pre-built docker image](https://hub.docker.com/r/hiyouga/verl) in EasyR1.
46
+
47
+ ```bash
48
+ docker pull hiyouga/verl:ngc-th2.7.0-cu12.6-vllm0.9.1
49
+ ```
50
+
51
+ ### Hardware Requirements
52
+
53
+ \* *estimated*
54
+
55
+ | Method | Bits | 1.5B | 3B | 7B | 32B | 72B |
56
+ | ------------------------ | ---- | ------ | ------ | ------ | ------- | ------- |
57
+ | GRPO Full Fine-Tuning | AMP | 2*24GB | 4*40GB | 8*40GB | 16*80GB | 32*80GB |
58
+ | GRPO Full Fine-Tuning | BF16 | 1*24GB | 1*40GB | 4*40GB | 8*80GB | 16*80GB |
59
+
60
+ > [!NOTE]
61
+ > Use `worker.actor.fsdp.torch_dtype=bf16` and `worker.actor.optim.strategy=adamw_bf16` to enable bf16 training.
62
+ >
63
+ > We are working hard to reduce the VRAM in RL training, LoRA support will be integrated in next updates.
64
+
65
+ ## Tutorial: Run Qwen2.5-VL GRPO on [Geometry3K](https://huggingface.co/datasets/hiyouga/geometry3k) Dataset in Just 3 Steps
66
+
67
+ ![image](assets/qwen2_5_vl_7b_geo.png)
68
+
69
+ ### Installation
70
+
71
+ ```bash
72
+ git clone https://github.com/hiyouga/EasyR1.git
73
+ cd EasyR1
74
+ pip install -e .
75
+ ```
76
+
77
+ ### GRPO Training
78
+
79
+ ```bash
80
+ bash examples/qwen2_5_vl_7b_geo3k_grpo.sh
81
+ ```
82
+
83
+ ### Merge Checkpoint in Hugging Face Format
84
+
85
+ ```bash
86
+ python3 scripts/model_merger.py --local_dir checkpoints/easy_r1/exp_name/global_step_1/actor
87
+ ```
88
+
89
+ > [!TIP]
90
+ > If you encounter issues with connecting to Hugging Face, consider using `export HF_ENDPOINT=https://hf-mirror.com`.
91
+ >
92
+ > If you want to use SwanLab logger, consider using `bash examples/qwen2_5_vl_7b_geo3k_swanlab.sh`.
93
+
94
+ ## Custom Dataset
95
+
96
+ Please refer to the example datasets to prepare your own dataset.
97
+
98
+ - Text dataset: https://huggingface.co/datasets/hiyouga/math12k
99
+ - Image-text dataset: https://huggingface.co/datasets/hiyouga/geometry3k
100
+ - Multi-image-text dataset: https://huggingface.co/datasets/hiyouga/journeybench-multi-image-vqa
101
+ - Text-image mixed dataset: https://huggingface.co/datasets/hiyouga/rl-mixed-dataset
102
+
103
+ ## How to Understand GRPO in EasyR1
104
+
105
+ ![image](assets/easyr1_grpo.png)
106
+
107
+ - To learn about the GRPO algorithm, you can refer to [Hugging Face's blog](https://huggingface.co/docs/trl/v0.16.1/en/grpo_trainer).
108
+
109
+ ## How to Run 70B+ Model in Multi-node Environment
110
+
111
+ 1. Start the Ray head node.
112
+
113
+ ```bash
114
+ ray start --head --port=6379 --dashboard-host=0.0.0.0
115
+ ```
116
+
117
+ 2. Start the Ray worker node and connect to the head node.
118
+
119
+ ```bash
120
+ ray start --address=<head_node_ip>:6379
121
+ ```
122
+
123
+ 3. Check the Ray resource pool.
124
+
125
+ ```bash
126
+ ray status
127
+ ```
128
+
129
+ 4. Run training script on the Ray head node only.
130
+
131
+ ```bash
132
+ bash examples/qwen2_5_vl_7b_geo3k_grpo.sh
133
+ ```
134
+
135
+ See the **[veRL's official doc](https://verl.readthedocs.io/en/latest/start/multinode.html)** for more details about multi-node training and Ray debugger.
136
+
137
+ ## Other Baselines
138
+
139
+ We also reproduced the following two baselines of the [R1-V](https://github.com/deep-agent/R1-V) project.
140
+ - [CLEVR-70k-Counting](examples/baselines/qwen2_5_vl_3b_clevr.sh): Train the Qwen2.5-VL-3B-Instruct model on counting problem.
141
+ - [GeoQA-8k](examples/baselines/qwen2_5_vl_3b_geoqa8k.sh): Train the Qwen2.5-VL-3B-Instruct model on GeoQA problem.
142
+
143
+ ## Performance Baselines
144
+
145
+ See [baselines.md](assets/baselines.md).
146
+
147
+ ## Awesome Work using EasyR1
148
+
149
+ - **MMR1**: Advancing the Frontiers of Multimodal Reasoning. [![[code]](https://img.shields.io/github/stars/LengSicong/MMR1)](https://github.com/LengSicong/MMR1)
150
+ - **Vision-R1**: Incentivizing Reasoning Capability in Multimodal Large Language Models. [![[code]](https://img.shields.io/github/stars/Osilly/Vision-R1)](https://github.com/Osilly/Vision-R1) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.06749-blue)](https://arxiv.org/abs/2503.06749)
151
+ - **Seg-Zero**: Reasoning-Chain Guided Segmentation via Cognitive Reinforcement. [![[code]](https://img.shields.io/github/stars/dvlab-research/Seg-Zero)](https://github.com/dvlab-research/Seg-Zero) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.06520-blue)](https://arxiv.org/abs/2503.06520)
152
+ - **MetaSpatial**: Reinforcing 3D Spatial Reasoning in VLMs for the Metaverse. [![[code]](https://img.shields.io/github/stars/PzySeere/MetaSpatial)](https://github.com/PzySeere/MetaSpatial) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.18470-blue)](https://arxiv.org/abs/2503.18470)
153
+ - **Temporal-R1**: Envolving Temporal Reasoning Capability into LMMs via Temporal Consistent Reward. [![[code]](https://img.shields.io/github/stars/appletea233/Temporal-R1)](https://github.com/appletea233/Temporal-R1)
154
+ - **NoisyRollout**: Reinforcing Visual Reasoning with Data Augmentation. [![[code]](https://img.shields.io/github/stars/John-AI-Lab/NoisyRollout)](https://github.com/John-AI-Lab/NoisyRollout) [![[arxiv]](https://img.shields.io/badge/arxiv-2504.13055-blue)](https://arxiv.org/pdf/2504.13055)
155
+ - **GUI-R1**: A Generalist R1-Style Vision-Language Action Model For GUI Agents. [![[code]](https://img.shields.io/github/stars/ritzz-ai/GUI-R1)](https://github.com/ritzz-ai/GUI-R1) [![[arxiv]](https://img.shields.io/badge/arxiv-2504.10458-blue)](https://arxiv.org/abs/2504.10458)
156
+ - **R1-Track**: Direct Application of MLLMs to Visual Object Tracking via Reinforcement Learning. [![[code]](https://img.shields.io/github/stars/Wangbiao2/R1-Track)](https://github.com/Wangbiao2/R1-Track)
157
+ - **VisionReasoner**: Unified Visual Perception and Reasoning via Reinforcement Learning. [![[code]](https://img.shields.io/github/stars/dvlab-research/VisionReasoner)](https://github.com/dvlab-research/VisionReasoner) [![[arxiv]](https://img.shields.io/badge/arxiv-2505.12081-blue)](https://arxiv.org/abs/2505.12081)
158
+ - **MM-UPT**: Unsupervised Post-Training for Multi-Modal LLM Reasoning via GRPO. [![[code]](https://img.shields.io/github/stars/waltonfuture/MM-UPT)](https://github.com/waltonfuture/MM-UPT) [![[arxiv]](https://img.shields.io/badge/arxiv-2505.22453-blue)](https://arxiv.org/pdf/2505.22453)
159
+ - **RL-with-Cold-Start**: Advancing Multimodal Reasoning via Reinforcement Learning with Cold Start. [![[code]](https://img.shields.io/github/stars/waltonfuture/RL-with-Cold-Start)](https://github.com/waltonfuture/RL-with-Cold-Start) [![[arxiv]](https://img.shields.io/badge/arxiv-2505.22334-blue)](https://arxiv.org/pdf/2505.22334)
160
+ - **ViGoRL**: Grounded Reinforcement Learning for Visual Reasoning. [![[code]](https://img.shields.io/github/stars/Gabesarch/grounded-rl)](https://github.com/Gabesarch/grounded-rl) [![[arxiv]](https://img.shields.io/badge/arxiv-2505.22334-blue)](https://arxiv.org/abs/2505.23678)
161
+ - **Revisual-R1**: Advancing Multimodal Reasoning: From Optimized Cold Start to Staged Reinforcement Learning. [![[code]](https://img.shields.io/github/stars/CSfufu/Revisual-R1)](https://github.com/CSfufu/Revisual-R1) [![[arxiv]](https://img.shields.io/badge/arxiv-2506.04207-blue)](https://arxiv.org/abs/2506.04207)
162
+ - **SophiaVL-R1**: Reinforcing MLLMs Reasoning with Thinking Reward. [![[code]](https://img.shields.io/github/stars/kxfan2002/SophiaVL-R1)](https://github.com/kxfan2002/SophiaVL-R1) [![[arxiv]](https://img.shields.io/badge/arxiv-2505.17018-blue)](https://arxiv.org/abs/2505.17018)
163
+ - **Vision-Matters**: Simple Visual Perturbations Can Boost Multimodal Math Reasoning. [![[code]](https://img.shields.io/github/stars/YutingLi0606/Vision-Matters)](https://github.com/YutingLi0606/Vision-Matters) [![[arxiv]](https://img.shields.io/badge/arxiv-2506.09736-blue)](https://arxiv.org/abs/2506.09736)
164
+ - **VTool-R1**: VLMs Learn to Think with Images via Reinforcement Learning on Multimodal Tool Use. [![[code]](https://img.shields.io/github/stars/VTOOL-R1/vtool-r1)](https://github.com/VTOOL-R1/vtool-r1) [![[arxiv]](https://img.shields.io/badge/arxiv-2505.19255-blue)](https://arxiv.org/abs/2505.19255)
165
+
166
+ ## TODO
167
+
168
+ - Support LoRA (high priority).
169
+ - Support ulysses parallelism for VLMs (middle priority).
170
+ - Support more VLM architectures.
171
+
172
+ > [!NOTE]
173
+ > We will not provide scripts for supervised fine-tuning and inference in this project. If you have such requirements, we recommend using [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory).
174
+
175
+ ### Known bugs
176
+
177
+ These features are temporarily disabled for now, we plan to fix them one-by-one in the future updates.
178
+
179
+ - Vision language models are not compatible with ulysses parallelism yet.
180
+
181
+ ## Discussion Group
182
+
183
+ 👋 Join our [WeChat group](assets/wechat.jpg).
184
+
185
+ ## FAQs
186
+
187
+ > ValueError: Image features and image tokens do not match: tokens: 8192, features 9800
188
+
189
+ Increase the `data.max_prompt_length` or reduce the `data.max_pixels`.
190
+
191
+ > RuntimeError: CUDA Error: out of memory at /workspace/csrc/cumem_allocator.cpp:62
192
+
193
+ Reduce the `worker.rollout.gpu_memory_utilization` and enable `worker.actor.offload.offload_params`.
194
+
195
+ > RuntimeError: 0 active drivers ([]). There should only be one.
196
+
197
+ Uninstall `deepspeed` from the current python environment.
198
+
199
+ ## Citation
200
+
201
+ Core contributors: [Yaowei Zheng](https://github.com/hiyouga), [Junting Lu](https://github.com/AL-377), [Shenzhi Wang](https://github.com/Shenzhi-Wang), [Zhangchi Feng](https://github.com/BUAADreamer), [Dongdong Kuang](https://github.com/Kuangdd01) and Yuwen Xiong
202
+
203
+ We also thank Guangming Sheng and Chi Zhang for helpful discussions.
204
+
205
+ ```bibtex
206
+ @misc{zheng2025easyr1,
207
+ title = {EasyR1: An Efficient, Scalable, Multi-Modality RL Training Framework},
208
+ author = {Yaowei Zheng, Junting Lu, Shenzhi Wang, Zhangchi Feng, Dongdong Kuang, Yuwen Xiong},
209
+ howpublished = {\url{https://github.com/hiyouga/EasyR1}},
210
+ year = {2025}
211
+ }
212
+ ```
213
+
214
+ We recommend to also cite the original work.
215
+
216
+ ```bibtex
217
+ @article{sheng2024hybridflow,
218
+ title = {HybridFlow: A Flexible and Efficient RLHF Framework},
219
+ author = {Guangming Sheng and Chi Zhang and Zilingfeng Ye and Xibin Wu and Wang Zhang and Ru Zhang and Yanghua Peng and Haibin Lin and Chuan Wu},
220
+ year = {2024},
221
+ journal = {arXiv preprint arXiv: 2409.19256}
222
+ }
223
+ ```
EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/files/media/table/val/generations_0_2feffd203d182aadef48.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "input_1", "output_1", "label_1", "score_1", "input_2", "output_2", "label_2", "score_2", "input_3", "output_3", "label_3", "score_3"], "data": [[0, "system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.\nuser\nGiven the protein amino acid sequence: PNAIVNRSIPEGVDVFEISGPLFFGAASKFKDAMHVVEKAPSIRILRMRKVMSIDATGLNMLKELFNDCRKSGTTLILSGVHTQPLFAMQQYGLADEIGEENIFGNIDDALDRARSLLGLPVQGRPAGF, does the protein have the property of solubility?\nOptions:\n0. Insoluble.\n1. Soluble. You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE output within <answer> </answer> tag.\nassistant\n", "<think>\nThe protein solubility can be predicted by the grand average of hydropathicity (GRAVY) scale. The GRAVY scale is based on the assumption that a protein with a large number of hydrophilic residues will have a negative value and a protein with a large number of hydrophobic residues will have a positive value. The GRAVY scale is calculated by summing the hydropathy values of all amino acid residues in a protein sequence and dividing by the number of residues in the sequence. The hydropathy value of an amino acid residue is a measure of its hydrophobicity, which is based on its chemical properties. The hydropathy value of an amino acid residue can be calculated using a hydrophobicity scale, which is a table that assigns a numerical value to each amino acid residue based on its hydrophobicity. The amino acid sequence of the protein is: PNAIVNRSIPEGVDVFEISGPLFFGAASKFKDAMHVVEKAPSIRILRMRKVMSIDATGLNMLKELFNDCRKSGTTLILSGVHTQPLFAMQQYGLADEIGEENIFGNIDDALDRARSLLGLPVQGRPAGF. The hydropathy value of each amino acid residue in the sequence is: 0.057, 0.069, 0.08, 0.077, 0.038, 0.05, 0.065, 0.07, 0.057, 0.056, 0.08, 0.059, 0.06, 0.057, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, ", 0, 0.0, "system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.\nuser\nGiven the protein amino acid sequence: MTYVLAESPEFYQDNVTDYTGQISSSDITNIQAAIDDVKASEQKVIFVVFLSSFDGVDPETWTQQALQANGGGNVLIYALAPEERQYGIQGGTQWTDAELDAANNAAFQALSQEDWAGSALALAESVGSSSSSSLEHHHHHH, does the protein have the property of solubility?\nOptions:\n0. Insoluble.\n1. Soluble. You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE output within <answer> </answer> tag.\nassistant\n", "<think>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n</think>\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydroph", 0, 0.9835715293884277, "system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.\nuser\nGiven the protein amino acid sequence: MGHHHHHHSHMDSKAIAERIKKLQAQQEHVRIGGKGTPRRKKKVIHKTAAADDKKLQSNLKKLSVTNIPGIEEVNMIKDDGTVIHFNNPKVQTSVPANTFSVTGSADNKQITEMLPGILNQLGPESLTHLKKLANNVTKLGPDGKGEDEDVPELVGDFDAASKNETKADEQ, does the protein have the property of solubility?\nOptions:\n0. Insoluble.\n1. Soluble. You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE output within <answer> </answer> tag.\nassistant\n", "<think>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins tend to be more soluble than hydrophobic proteins. The presence of a large number of hydrophobic amino acids, such as phenylalanine, tryptophan, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 11 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine", 0, 0.0]]}
EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/files/requirements.txt ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setproctitle==1.2.2
2
+ psutil==7.0.0
3
+ colorama==0.4.6
4
+ scipy==1.15.3
5
+ protobuf==4.25.8
6
+ xxhash==3.5.0
7
+ tiktoken==0.9.0
8
+ setproctitle==1.3.6
9
+ opentelemetry-exporter-otlp-proto-grpc==1.26.0
10
+ pyasn1==0.6.1
11
+ typing_extensions==4.14.0
12
+ xgrammar==0.1.18
13
+ frozenlist==1.7.0
14
+ gguf==0.17.0
15
+ tzdata==2025.2
16
+ interegular==0.3.3
17
+ nvidia-cusparselt-cu12==0.6.2
18
+ nvidia-cusparse-cu12==12.3.1.170
19
+ filelock==3.18.0
20
+ wrapt==1.17.2
21
+ multiprocess==0.70.16
22
+ certifi==2025.4.26
23
+ email_validator==2.2.0
24
+ ninja==1.11.1.4
25
+ cupy-cuda12x==13.4.1
26
+ fsspec==2025.3.0
27
+ grpcio==1.73.0
28
+ referencing==0.36.2
29
+ exceptiongroup==1.3.0
30
+ httpcore==1.0.9
31
+ tqdm==4.67.1
32
+ torchvision==0.21.0
33
+ omegaconf==2.3.0
34
+ tensordict==0.8.3
35
+ nvidia-cusolver-cu12==11.6.1.9
36
+ pycountry==24.6.1
37
+ aiohappyeyeballs==2.6.1
38
+ aiohttp-cors==0.8.1
39
+ pydantic==2.11.6
40
+ python-dotenv==1.1.0
41
+ propcache==0.3.2
42
+ platformdirs==4.3.8
43
+ googleapis-common-protos==1.70.0
44
+ vllm==0.8.5.post1
45
+ modelscope==1.27.0
46
+ GitPython==3.1.44
47
+ packaging==25.0
48
+ fastapi==0.115.12
49
+ smmap==5.0.2
50
+ fastapi-cli==0.0.7
51
+ huggingface-hub==0.33.0
52
+ peft==0.15.2
53
+ wandb==0.20.1
54
+ cachetools==5.5.2
55
+ aiosignal==1.3.2
56
+ astor==0.8.1
57
+ msgpack==1.1.1
58
+ cloudpickle==3.1.1
59
+ attrs==25.3.0
60
+ google-auth==2.40.3
61
+ ray==2.47.0
62
+ opentelemetry-exporter-otlp-proto-common==1.26.0
63
+ flash-attn==2.7.1.post1
64
+ codetiming==1.4.0
65
+ typing-inspection==0.4.1
66
+ rich-toolkit==0.14.7
67
+ regex==2024.11.6
68
+ einops==0.8.1
69
+ nvidia-nvtx-cu12==12.4.127
70
+ torchdata==0.11.0
71
+ orjson==3.10.18
72
+ opentelemetry-exporter-otlp==1.26.0
73
+ requests==2.32.4
74
+ yarl==1.20.1
75
+ six==1.17.0
76
+ compressed-tensors==0.9.3
77
+ nvidia-cudnn-cu12==9.1.0.70
78
+ pydantic_core==2.33.2
79
+ rsa==4.9.1
80
+ fastrlock==0.8.3
81
+ prometheus-fastapi-instrumentator==7.1.0
82
+ psutil==7.0.0
83
+ async-timeout==5.0.1
84
+ msgspec==0.19.0
85
+ nvidia-cufile-cu12==1.11.1.6
86
+ sentencepiece==0.2.0
87
+ rpds-py==0.25.1
88
+ mathruler==0.1.0
89
+ opentelemetry-proto==1.26.0
90
+ pyarrow==20.0.0
91
+ mdurl==0.1.2
92
+ python-json-logger==3.3.0
93
+ python-dateutil==2.9.0.post0
94
+ numpy==2.2.6
95
+ markdown-it-py==3.0.0
96
+ tokenizers==0.21.1
97
+ wheel==0.45.1
98
+ gitdb==4.0.12
99
+ nvidia-nvjitlink-cu12==12.4.127
100
+ urllib3==2.4.0
101
+ airportsdata==20250523
102
+ prometheus_client==0.22.1
103
+ safetensors==0.5.3
104
+ setuptools==78.1.1
105
+ opentelemetry-api==1.26.0
106
+ liger_kernel==0.5.10
107
+ click==8.2.1
108
+ mpmath==1.3.0
109
+ dill==0.3.8
110
+ PyYAML==6.0.2
111
+ outlines_core==0.1.26
112
+ jsonschema-specifications==2025.4.1
113
+ nvidia-curand-cu12==10.3.5.147
114
+ zipp==3.23.0
115
+ triton==3.2.0
116
+ python-multipart==0.0.20
117
+ distlib==0.3.9
118
+ transformers==4.52.4
119
+ pandas==2.3.0
120
+ uvloop==0.21.0
121
+ opencensus==0.11.4
122
+ opencensus-context==0.1.3
123
+ numba==0.61.2
124
+ dnspython==2.7.0
125
+ sympy==1.13.1
126
+ pillow==11.2.1
127
+ idna==3.10
128
+ nvidia-cuda-runtime-cu12==12.4.127
129
+ sniffio==1.3.1
130
+ antlr4-python3-runtime==4.9.3
131
+ annotated-types==0.7.0
132
+ uvicorn==0.34.3
133
+ Pygments==2.19.1
134
+ nvidia-cuda-cupti-cu12==12.4.127
135
+ opentelemetry-exporter-prometheus==0.55b1
136
+ h11==0.16.0
137
+ depyf==0.18.0
138
+ colorful==0.5.6
139
+ Jinja2==3.1.6
140
+ nvidia-cuda-nvrtc-cu12==12.4.127
141
+ shellingham==1.5.4
142
+ mistral_common==1.6.2
143
+ partial-json-parser==0.2.1.1.post5
144
+ lm-format-enforcer==0.10.11
145
+ pyzmq==26.4.0
146
+ datasets==3.6.0
147
+ httpx==0.28.1
148
+ charset-normalizer==3.4.2
149
+ hf-xet==1.1.3
150
+ av==14.4.0
151
+ qwen-vl-utils==0.0.11
152
+ opencv-python-headless==4.11.0.86
153
+ nvidia-nccl-cu12==2.21.5
154
+ opentelemetry-semantic-conventions-ai==0.4.9
155
+ py-cpuinfo==9.0.0
156
+ nest-asyncio==1.6.0
157
+ blake3==1.0.5
158
+ pytz==2025.2
159
+ proto-plus==1.26.1
160
+ pylatexenc==2.10
161
+ websockets==15.0.1
162
+ starlette==0.46.2
163
+ nvidia-cublas-cu12==12.4.5.8
164
+ importlib_metadata==8.0.0
165
+ aiohttp==3.12.12
166
+ anyio==4.9.0
167
+ llvmlite==0.44.0
168
+ sentry-sdk==2.30.0
169
+ multidict==6.4.4
170
+ verl==0.3.1.dev0
171
+ distro==1.9.0
172
+ torch==2.6.0
173
+ Deprecated==1.2.18
174
+ MarkupSafe==3.0.2
175
+ torchaudio==2.6.0
176
+ nvidia-cufft-cu12==11.2.1.3
177
+ networkx==3.4.2
178
+ httptools==0.6.4
179
+ opentelemetry-sdk==1.26.0
180
+ pyasn1_modules==0.4.2
181
+ opentelemetry-exporter-otlp-proto-http==1.26.0
182
+ google-api-core==2.25.1
183
+ diskcache==5.6.3
184
+ rich==14.0.0
185
+ virtualenv==20.31.2
186
+ outlines==0.1.11
187
+ llguidance==0.7.29
188
+ py-spy==0.4.0
189
+ lark==1.2.2
190
+ typer==0.16.0
191
+ jiter==0.10.0
192
+ opentelemetry-semantic-conventions==0.47b0
193
+ jsonschema==4.24.0
194
+ watchfiles==1.0.5
195
+ pip==25.1
196
+ smart-open==7.1.0
197
+ openai==1.86.0
198
+ accelerate==1.7.0
199
+ xformers==0.0.29.post2
200
+ verl==0.3.1.dev0
201
+ autocommand==2.2.2
202
+ jaraco.functools==4.0.1
203
+ typeguard==4.3.0
204
+ platformdirs==4.2.2
205
+ jaraco.text==3.12.1
206
+ wheel==0.45.1
207
+ inflect==7.3.1
208
+ jaraco.context==5.3.0
209
+ jaraco.collections==5.1.0
210
+ packaging==24.2
211
+ more-itertools==10.3.0
212
+ typing_extensions==4.12.2
213
+ importlib_metadata==8.0.0
214
+ backports.tarfile==1.2.0
215
+ zipp==3.19.2
216
+ tomli==2.0.1
EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/files/wandb-metadata.json ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-06-14T11:33:20.797563Z",
5
+ "args": [
6
+ "--node-ip-address=10.1.0.34",
7
+ "--node-manager-port=44395",
8
+ "--object-store-name=/tmp/ray/session_2025-06-14_19-26-10_939959_377/sockets/plasma_store",
9
+ "--raylet-name=/tmp/ray/session_2025-06-14_19-26-10_939959_377/sockets/raylet",
10
+ "--redis-address=None",
11
+ "--metrics-agent-port=53248",
12
+ "--logging-rotate-bytes=536870912",
13
+ "--logging-rotate-backup-count=5",
14
+ "--runtime-env-agent-port=62639",
15
+ "--gcs-address=10.1.0.34:54500",
16
+ "--session-name=session_2025-06-14_19-26-10_939959_377",
17
+ "--temp-dir=/tmp/ray",
18
+ "--webui=127.0.0.1:8265",
19
+ "--cluster-id=87252e5ee41fad91c76da7e57bdcecbe022b796f41d09458b8a10f7c",
20
+ "--startup-token=24",
21
+ "--worker-launch-time-ms=1749900374194",
22
+ "--node-id=55695a579dfa13036c97ee61aacacb1a3bcb84cbe0636297d5a4b61b",
23
+ "--runtime-env-hash=-115784934",
24
+ "--enable-resource-isolation=false"
25
+ ],
26
+ "program": "/root/miniconda3/envs/easyr1/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
27
+ "email": "gia0603yucca@gmail.com",
28
+ "root": "/nas/shared/kilab/wangyujia/EasyR1/examples",
29
+ "host": "dlcjqxpfs58ebbif-master-0",
30
+ "executable": "/root/miniconda3/envs/easyr1/bin/python3",
31
+ "cpu_count": 24,
32
+ "cpu_count_logical": 24,
33
+ "gpu": "NVIDIA A800-SXM4-80GB",
34
+ "gpu_count": 8,
35
+ "disk": {
36
+ "/": {
37
+ "total": "1623302262784",
38
+ "used": "93401088"
39
+ }
40
+ },
41
+ "memory": {
42
+ "total": "549755813888"
43
+ },
44
+ "cpu": {
45
+ "count": 24,
46
+ "countLogical": 24
47
+ },
48
+ "gpu_nvidia": [
49
+ {
50
+ "name": "NVIDIA A800-SXM4-80GB",
51
+ "architecture": "Ampere",
52
+ "uuid": "GPU-6074818b-3292-5382-c329-f6ea4933c000"
53
+ },
54
+ {
55
+ "name": "NVIDIA A800-SXM4-80GB",
56
+ "architecture": "Ampere",
57
+ "uuid": "GPU-9153a1c2-42e3-a4bf-5b99-c511c2dd1cd7"
58
+ },
59
+ {
60
+ "name": "NVIDIA A800-SXM4-80GB",
61
+ "architecture": "Ampere",
62
+ "uuid": "GPU-d3362bbd-73d7-9f93-9e24-20b1dde2c73d"
63
+ },
64
+ {
65
+ "name": "NVIDIA A800-SXM4-80GB",
66
+ "architecture": "Ampere",
67
+ "uuid": "GPU-0a7ad2e1-b610-b966-bb15-97dee8beda1d"
68
+ },
69
+ {
70
+ "name": "NVIDIA A800-SXM4-80GB",
71
+ "architecture": "Ampere",
72
+ "uuid": "GPU-71868de1-0c70-4e5f-ff0f-c6c77d6ff52a"
73
+ },
74
+ {
75
+ "name": "NVIDIA A800-SXM4-80GB",
76
+ "architecture": "Ampere",
77
+ "uuid": "GPU-91ff7b89-a1a9-22b7-e7a6-d280378f4c22"
78
+ },
79
+ {
80
+ "name": "NVIDIA A800-SXM4-80GB",
81
+ "architecture": "Ampere",
82
+ "uuid": "GPU-6da63e94-6ad8-9210-fcc7-9803f04b4499"
83
+ },
84
+ {
85
+ "name": "NVIDIA A800-SXM4-80GB",
86
+ "architecture": "Ampere",
87
+ "uuid": "GPU-17618b8c-49d9-1302-2d8b-8e2d3323f951"
88
+ }
89
+ ],
90
+ "cudaVersion": "12.1"
91
+ }
EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/reward_score":0.42200788855552673,"_runtime":2222.63040867,"val/accuracy_reward":0,"val/overall_reward":0.4220078922653015,"_timestamp":1.7499027736309972e+09,"val/repeat_reward":0.4220078922653015,"_step":0,"_wandb":{"runtime":2222},"val/format_reward":0,"val/generations":{"artifact_path":"wandb-client-artifact://ijyj2h6rsizx63o43gkuzc4ss8t0t749zor61i1pqle3t4c3ibf0xivv691aymsesb6ig7a9fuappbvthpwjudrcl39qby5t4etler4em69qzxjpizsh8ela68r5ocgs/val/generations.table.json","sha256":"2feffd203d182aadef48f52d1efd60785cdc0e27cfd8e96c5ed06b0f422b9a10","_latest_artifact_path":"wandb-client-artifact://l4v9x108g3wt2q2co8z0amg8u4xglpreu8janqpe0gu6vsl28ihxry8uc140267pdit8ni6m32ykh8gkouuhluxdqm3n1ipk3vzgp4x0kh75u6xtedujh1a0uh4vonfq:latest/val/generations.table.json","path":"media/table/val/generations_0_2feffd203d182aadef48.table.json","ncols":13,"log_mode":"IMMUTABLE","_type":"table-file","nrows":1,"size":37088}}
EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/logs/debug-internal.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-06-14T19:33:20.8065764+08:00","level":"INFO","msg":"stream: starting","core version":"0.20.1","symlink path":"/nas/shared/kilab/wangyujia/EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/logs/debug-core.log"}
2
+ {"time":"2025-06-14T19:33:22.123634875+08:00","level":"INFO","msg":"stream: created new stream","id":"9rxy2gyp"}
3
+ {"time":"2025-06-14T19:33:22.123667201+08:00","level":"INFO","msg":"stream: started","id":"9rxy2gyp"}
4
+ {"time":"2025-06-14T19:33:22.123732778+08:00","level":"INFO","msg":"writer: Do: started","stream_id":"9rxy2gyp"}
5
+ {"time":"2025-06-14T19:33:22.123739338+08:00","level":"INFO","msg":"sender: started","stream_id":"9rxy2gyp"}
6
+ {"time":"2025-06-14T19:33:22.123765948+08:00","level":"INFO","msg":"handler: started","stream_id":"9rxy2gyp"}
7
+ {"time":"2025-06-14T19:33:22.624115243+08:00","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-06-14T20:10:23.427993609+08:00","level":"INFO","msg":"Stopping system monitor"}
9
+ {"time":"2025-06-14T20:10:23.429125637+08:00","level":"INFO","msg":"Stopped system monitor"}
10
+ {"time":"2025-06-14T20:10:24.39935884+08:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading wandb-summary.json","runtime_seconds":0.523990922},{"desc":"uploading output.log","runtime_seconds":0.523981291},{"desc":"uploading history steps 0-0, summary, console lines 137-204","runtime_seconds":0.048062105}],"total_operations":3}}
11
+ {"time":"2025-06-14T20:10:26.867530799+08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
12
+ {"time":"2025-06-14T20:10:29.822474057+08:00","level":"INFO","msg":"stream: closing","id":"9rxy2gyp"}
13
+ {"time":"2025-06-14T20:10:29.822488409+08:00","level":"INFO","msg":"handler: closed","stream_id":"9rxy2gyp"}
14
+ {"time":"2025-06-14T20:10:29.822495399+08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"9rxy2gyp"}
15
+ {"time":"2025-06-14T20:10:29.822500164+08:00","level":"INFO","msg":"sender: closed","stream_id":"9rxy2gyp"}
16
+ {"time":"2025-06-14T20:10:29.824705513+08:00","level":"INFO","msg":"stream: closed","id":"9rxy2gyp"}
EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/logs/debug.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-06-14 19:33:20,580 INFO MainThread:2538 [wandb_setup.py:_flush():81] Current SDK version is 0.20.1
2
+ 2025-06-14 19:33:20,580 INFO MainThread:2538 [wandb_setup.py:_flush():81] Configure stats pid to 2538
3
+ 2025-06-14 19:33:20,580 INFO MainThread:2538 [wandb_setup.py:_flush():81] Loading settings from /root/.config/wandb/settings
4
+ 2025-06-14 19:33:20,580 INFO MainThread:2538 [wandb_setup.py:_flush():81] Loading settings from /nas/shared/kilab/wangyujia/EasyR1/examples/wandb/settings
5
+ 2025-06-14 19:33:20,580 INFO MainThread:2538 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2025-06-14 19:33:20,580 INFO MainThread:2538 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /nas/shared/kilab/wangyujia/EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/logs/debug.log
7
+ 2025-06-14 19:33:20,580 INFO MainThread:2538 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /nas/shared/kilab/wangyujia/EasyR1/examples/wandb/run-20250614_193320-9rxy2gyp/logs/debug-internal.log
8
+ 2025-06-14 19:33:20,580 INFO MainThread:2538 [wandb_init.py:init():831] calling init triggers
9
+ 2025-06-14 19:33:20,581 INFO MainThread:2538 [wandb_init.py:init():836] wandb.init called with sweep_config: {}
10
+ config: {'data': {'train_files': '/nas/shared/kilab/wangyujia/rl_data@train', 'val_files': '/nas/shared/kilab/wangyujia/rl_data@validation', 'prompt_key': 'question', 'answer_key': 'answer', 'image_key': 'images', 'image_dir': None, 'max_prompt_length': 8192, 'max_response_length': 8192, 'rollout_batch_size': 256, 'val_batch_size': 1024, 'format_prompt': '/nas/shared/kilab/wangyujia/EasyR1/examples/format_prompt/bio_format.jinja', 'override_chat_template': None, 'shuffle': True, 'seed': 1, 'min_pixels': 262144, 'max_pixels': 4194304, 'filter_overlong_prompts': True}, 'worker': {'hybrid_engine': True, 'actor': {'strategy': 'fsdp', 'global_batch_size': 128, 'micro_batch_size_per_device_for_update': 2, 'micro_batch_size_per_device_for_experience': 64, 'max_grad_norm': 1.0, 'clip_ratio_low': 0.2, 'clip_ratio_high': 0.3, 'clip_ratio_dual': 3.0, 'ppo_epochs': 1, 'padding_free': True, 'ulysses_sequence_parallel_size': 1, 'use_torch_compile': True, 'model': {'model_path': '/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300', 'tokenizer_path': '/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300', 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': False, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1610}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': True, 'offload_optimizer': True}, 'global_batch_size_per_device': -1, 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01}, 'critic': {'strategy': 'fsdp', 'global_batch_size': 256, 'micro_batch_size_per_device_for_update': 4, 'micro_batch_size_per_device_for_experience': 16, 'max_grad_norm': 1.0, 'cliprange_value': 0.5, 'ppo_epochs': 1, 'padding_free': False, 'ulysses_sequence_parallel_size': 1, 'model': {'model_path': None, 'tokenizer_path': None, 'override_config': {}, 'enable_gradient_checkpointing': True, 'trust_remote_code': True, 'freeze_vision_tower': False}, 'optim': {'lr': 1e-06, 'betas': [0.9, 0.999], 'weight_decay': 0.01, 'strategy': 'adamw', 'lr_warmup_ratio': 0.0, 'min_lr_ratio': None, 'warmup_style': 'constant', 'training_steps': 1610}, 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': False, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'global_batch_size_per_device': -1}, 'ref': {'strategy': 'fsdp', 'fsdp': {'enable_full_shard': True, 'enable_cpu_offload': True, 'enable_rank0_init': True, 'use_orig_params': False, 'torch_dtype': None, 'fsdp_size': -1, 'mp_param_dtype': 'bf16', 'mp_reduce_dtype': 'fp32', 'mp_buffer_dtype': 'fp32'}, 'offload': {'offload_params': False, 'offload_optimizer': False}, 'micro_batch_size_per_device_for_experience': 64, 'padding_free': True, 'ulysses_sequence_parallel_size': 1, 'use_torch_compile': True}, 'reward': {'reward_type': 'batch', 'reward_function': '/nas/shared/kilab/wangyujia/EasyR1/examples/reward_function/bio.py', 'reward_function_kwargs': {}, 'skip_special_tokens': True, 'num_cpus': 1, 'reward_function_name': 'compute_score'}, 'rollout': {'name': 'vllm', 'n': 8, 'temperature': 1.0, 'top_p': 0.99, 'top_k': -1, 'seed': 1, 'limit_images': 0, 'dtype': 'bf16', 'gpu_memory_utilization': 0.75, 'ignore_eos': False, 'enforce_eager': False, 'enable_chunked_prefill': False, 'tensor_parallel_size': 1, 'max_model_len': None, 'max_num_batched_tokens': 16384, 'disable_log_stats': True, 'val_override_config': {'temperature': 0.5, 'n': 1}, 'prompt_length': 8192, 'response_length': 8192, 'trust_remote_code': False}}, 'algorithm': {'gamma': 1.0, 'lam': 1.0, 'adv_estimator': 'grpo', 'disable_kl': False, 'use_kl_loss': True, 'kl_penalty': 'low_var_kl', 'kl_coef': 0.01, 'kl_type': 'fixed', 'kl_horizon': 0.0, 'kl_target': 0.0}, 'trainer': {'total_epochs': 5, 'max_steps': None, 'project_name': 'easy_r1', 'experiment_name': 'qwen2_5_bio_grpo', 'logger': ['console', 'wandb'], 'nnodes': 1, 'n_gpus_per_node': 8, 'critic_warmup': 0, 'val_freq': 5, 'val_before_train': True, 'val_only': False, 'val_generations_to_log': 3, 'save_freq': 5, 'save_limit': 3, 'save_checkpoint_path': '/oss/wangyujia/BIO/rl/qwen2.5_7b_bio', 'load_checkpoint_path': None}, '_wandb': {}}
11
+ 2025-06-14 19:33:20,581 INFO MainThread:2538 [wandb_init.py:init():872] starting backend
12
+ 2025-06-14 19:33:20,793 INFO MainThread:2538 [wandb_init.py:init():875] sending inform_init request
13
+ 2025-06-14 19:33:20,796 INFO MainThread:2538 [wandb_init.py:init():883] backend started and connected
14
+ 2025-06-14 19:33:20,799 INFO MainThread:2538 [wandb_init.py:init():956] updated telemetry
15
+ 2025-06-14 19:33:20,799 INFO MainThread:2538 [wandb_init.py:init():980] communicating run to backend with 90.0 second timeout
16
+ 2025-06-14 19:33:22,613 INFO MainThread:2538 [wandb_init.py:init():1032] starting run threads in backend
17
+ 2025-06-14 19:33:22,807 INFO MainThread:2538 [wandb_run.py:_console_start():2453] atexit reg
18
+ 2025-06-14 19:33:22,807 INFO MainThread:2538 [wandb_run.py:_redirect():2301] redirect: wrap_raw
19
+ 2025-06-14 19:33:22,807 INFO MainThread:2538 [wandb_run.py:_redirect():2370] Wrapping output streams.
20
+ 2025-06-14 19:33:22,807 INFO MainThread:2538 [wandb_run.py:_redirect():2393] Redirects installed.
21
+ 2025-06-14 19:33:22,810 INFO MainThread:2538 [wandb_init.py:init():1078] run started, returning control to user process
22
+ 2025-06-14 20:10:23,389 INFO MainThread:2538 [wandb_run.py:_finish():2219] finishing run gia0603yucca/easy_r1/9rxy2gyp
23
+ 2025-06-14 20:10:23,395 INFO MainThread:2538 [wandb_run.py:_atexit_cleanup():2418] got exitcode: 0
24
+ 2025-06-14 20:10:23,397 INFO MainThread:2538 [wandb_run.py:_restore():2400] restore
25
+ 2025-06-14 20:10:23,397 INFO MainThread:2538 [wandb_run.py:_restore():2406] restore done
26
+ 2025-06-14 20:10:29,817 INFO MainThread:2538 [wandb_run.py:_footer_history_summary_info():4000] rendering history
27
+ 2025-06-14 20:10:29,818 INFO MainThread:2538 [wandb_run.py:_footer_history_summary_info():4032] rendering summary
28
+ 2025-06-14 20:10:29,818 INFO MainThread:2538 [wandb_run.py:_footer_sync_info():3961] logging synced files
EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/config.yaml ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.20.1
4
+ m: []
5
+ python_version: 3.10.0
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 11
10
+ - 30
11
+ - 41
12
+ - 49
13
+ - 51
14
+ - 71
15
+ - 95
16
+ - 98
17
+ - 105
18
+ "2":
19
+ - 1
20
+ - 11
21
+ - 30
22
+ - 41
23
+ - 49
24
+ - 51
25
+ - 71
26
+ - 95
27
+ - 98
28
+ - 105
29
+ "3":
30
+ - 2
31
+ - 13
32
+ - 16
33
+ - 55
34
+ - 61
35
+ "4": 3.10.0
36
+ "5": 0.20.1
37
+ "6": 4.52.4
38
+ "12": 0.20.1
39
+ "13": linux-x86_64
40
+ algorithm:
41
+ value:
42
+ adv_estimator: grpo
43
+ disable_kl: false
44
+ gamma: 1
45
+ kl_coef: 0.01
46
+ kl_horizon: 0
47
+ kl_penalty: low_var_kl
48
+ kl_target: 0
49
+ kl_type: fixed
50
+ lam: 1
51
+ use_kl_loss: true
52
+ data:
53
+ value:
54
+ answer_key: answer
55
+ filter_overlong_prompts: true
56
+ format_prompt: /nas/shared/kilab/wangyujia/EasyR1/examples/format_prompt/bio_format.jinja
57
+ image_dir: null
58
+ image_key: images
59
+ max_pixels: 4194304
60
+ max_prompt_length: 8192
61
+ max_response_length: 8192
62
+ min_pixels: 262144
63
+ override_chat_template: null
64
+ prompt_key: question
65
+ rollout_batch_size: 256
66
+ seed: 1
67
+ shuffle: true
68
+ train_files: /nas/shared/kilab/wangyujia/rl_data@train
69
+ val_batch_size: 1024
70
+ val_files: /nas/shared/kilab/wangyujia/rl_data@validation
71
+ trainer:
72
+ value:
73
+ critic_warmup: 0
74
+ experiment_name: qwen2_5_bio_grpo
75
+ load_checkpoint_path: null
76
+ logger:
77
+ - console
78
+ - wandb
79
+ max_steps: null
80
+ n_gpus_per_node: 8
81
+ nnodes: 1
82
+ project_name: easy_r1
83
+ save_checkpoint_path: /oss/wangyujia/BIO/rl/qwen2.5_7b_bio
84
+ save_freq: 5
85
+ save_limit: 3
86
+ total_epochs: 5
87
+ val_before_train: true
88
+ val_freq: 5
89
+ val_generations_to_log: 3
90
+ val_only: false
91
+ worker:
92
+ value:
93
+ actor:
94
+ clip_ratio_dual: 3
95
+ clip_ratio_high: 0.3
96
+ clip_ratio_low: 0.2
97
+ disable_kl: false
98
+ fsdp:
99
+ enable_cpu_offload: false
100
+ enable_full_shard: true
101
+ enable_rank0_init: true
102
+ fsdp_size: -1
103
+ mp_buffer_dtype: fp32
104
+ mp_param_dtype: bf16
105
+ mp_reduce_dtype: fp32
106
+ torch_dtype: null
107
+ use_orig_params: false
108
+ global_batch_size: 128
109
+ global_batch_size_per_device: -1
110
+ kl_coef: 0.01
111
+ kl_penalty: low_var_kl
112
+ max_grad_norm: 1
113
+ micro_batch_size_per_device_for_experience: 16
114
+ micro_batch_size_per_device_for_update: 2
115
+ model:
116
+ enable_gradient_checkpointing: true
117
+ freeze_vision_tower: false
118
+ model_path: /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300
119
+ tokenizer_path: /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300
120
+ trust_remote_code: false
121
+ offload:
122
+ offload_optimizer: true
123
+ offload_params: true
124
+ optim:
125
+ betas:
126
+ - 0.9
127
+ - 0.999
128
+ lr: 1e-06
129
+ lr_warmup_ratio: 0
130
+ min_lr_ratio: null
131
+ strategy: adamw
132
+ training_steps: 1610
133
+ warmup_style: constant
134
+ weight_decay: 0.01
135
+ padding_free: true
136
+ ppo_epochs: 1
137
+ strategy: fsdp
138
+ ulysses_sequence_parallel_size: 1
139
+ use_kl_loss: true
140
+ use_torch_compile: true
141
+ critic:
142
+ cliprange_value: 0.5
143
+ fsdp:
144
+ enable_cpu_offload: false
145
+ enable_full_shard: true
146
+ enable_rank0_init: true
147
+ fsdp_size: -1
148
+ mp_buffer_dtype: fp32
149
+ mp_param_dtype: bf16
150
+ mp_reduce_dtype: fp32
151
+ torch_dtype: null
152
+ use_orig_params: false
153
+ global_batch_size: 256
154
+ global_batch_size_per_device: -1
155
+ max_grad_norm: 1
156
+ micro_batch_size_per_device_for_experience: 16
157
+ micro_batch_size_per_device_for_update: 4
158
+ model:
159
+ enable_gradient_checkpointing: true
160
+ freeze_vision_tower: false
161
+ model_path: null
162
+ tokenizer_path: null
163
+ trust_remote_code: true
164
+ offload:
165
+ offload_optimizer: false
166
+ offload_params: false
167
+ optim:
168
+ betas:
169
+ - 0.9
170
+ - 0.999
171
+ lr: 1e-06
172
+ lr_warmup_ratio: 0
173
+ min_lr_ratio: null
174
+ strategy: adamw
175
+ training_steps: 1610
176
+ warmup_style: constant
177
+ weight_decay: 0.01
178
+ padding_free: false
179
+ ppo_epochs: 1
180
+ strategy: fsdp
181
+ ulysses_sequence_parallel_size: 1
182
+ hybrid_engine: true
183
+ ref:
184
+ fsdp:
185
+ enable_cpu_offload: true
186
+ enable_full_shard: true
187
+ enable_rank0_init: true
188
+ fsdp_size: -1
189
+ mp_buffer_dtype: fp32
190
+ mp_param_dtype: bf16
191
+ mp_reduce_dtype: fp32
192
+ torch_dtype: null
193
+ use_orig_params: false
194
+ micro_batch_size_per_device_for_experience: 16
195
+ offload:
196
+ offload_optimizer: false
197
+ offload_params: false
198
+ padding_free: true
199
+ strategy: fsdp
200
+ ulysses_sequence_parallel_size: 1
201
+ use_torch_compile: true
202
+ reward:
203
+ num_cpus: 1
204
+ reward_function: /nas/shared/kilab/wangyujia/EasyR1/examples/reward_function/bio.py
205
+ reward_function_name: compute_score
206
+ reward_type: batch
207
+ skip_special_tokens: true
208
+ rollout:
209
+ disable_log_stats: true
210
+ dtype: bf16
211
+ enable_chunked_prefill: false
212
+ enforce_eager: false
213
+ gpu_memory_utilization: 0.75
214
+ ignore_eos: false
215
+ limit_images: 0
216
+ max_model_len: null
217
+ max_num_batched_tokens: 16384
218
+ "n": 8
219
+ name: vllm
220
+ prompt_length: 8192
221
+ response_length: 8192
222
+ seed: 1
223
+ temperature: 1
224
+ tensor_parallel_size: 1
225
+ top_k: -1
226
+ top_p: 0.99
227
+ trust_remote_code: false
228
+ val_override_config:
229
+ "n": 1
230
+ temperature: 0.5
EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/media/table/val/generations_0_2feffd203d182aadef48.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["step", "input_1", "output_1", "label_1", "score_1", "input_2", "output_2", "label_2", "score_2", "input_3", "output_3", "label_3", "score_3"], "data": [[0, "system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.\nuser\nGiven the protein amino acid sequence: PNAIVNRSIPEGVDVFEISGPLFFGAASKFKDAMHVVEKAPSIRILRMRKVMSIDATGLNMLKELFNDCRKSGTTLILSGVHTQPLFAMQQYGLADEIGEENIFGNIDDALDRARSLLGLPVQGRPAGF, does the protein have the property of solubility?\nOptions:\n0. Insoluble.\n1. Soluble. You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE output within <answer> </answer> tag.\nassistant\n", "<think>\nThe protein solubility can be predicted by the grand average of hydropathicity (GRAVY) scale. The GRAVY scale is based on the assumption that a protein with a large number of hydrophilic residues will have a negative value and a protein with a large number of hydrophobic residues will have a positive value. The GRAVY scale is calculated by summing the hydropathy values of all amino acid residues in a protein sequence and dividing by the number of residues in the sequence. The hydropathy value of an amino acid residue is a measure of its hydrophobicity, which is based on its chemical properties. The hydropathy value of an amino acid residue can be calculated using a hydrophobicity scale, which is a table that assigns a numerical value to each amino acid residue based on its hydrophobicity. The amino acid sequence of the protein is: PNAIVNRSIPEGVDVFEISGPLFFGAASKFKDAMHVVEKAPSIRILRMRKVMSIDATGLNMLKELFNDCRKSGTTLILSGVHTQPLFAMQQYGLADEIGEENIFGNIDDALDRARSLLGLPVQGRPAGF. The hydropathy value of each amino acid residue in the sequence is: 0.057, 0.069, 0.08, 0.077, 0.038, 0.05, 0.065, 0.07, 0.057, 0.056, 0.08, 0.059, 0.06, 0.057, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, 0.065, 0.07, 0.05, ", 0, 0.0, "system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.\nuser\nGiven the protein amino acid sequence: MTYVLAESPEFYQDNVTDYTGQISSSDITNIQAAIDDVKASEQKVIFVVFLSSFDGVDPETWTQQALQANGGGNVLIYALAPEERQYGIQGGTQWTDAELDAANNAAFQALSQEDWAGSALALAESVGSSSSSSLEHHHHHH, does the protein have the property of solubility?\nOptions:\n0. Insoluble.\n1. Soluble. You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE output within <answer> </answer> tag.\nassistant\n", "<think>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n</think>\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydrophobic proteins. Therefore, the protein is more likely to be soluble.\n<answer>\n1. Soluble.\n</answer>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins are more likely to be soluble than hydroph", 0, 0.9835715293884277, "system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.\nuser\nGiven the protein amino acid sequence: MGHHHHHHSHMDSKAIAERIKKLQAQQEHVRIGGKGTPRRKKKVIHKTAAADDKKLQSNLKKLSVTNIPGIEEVNMIKDDGTVIHFNNPKVQTSVPANTFSVTGSADNKQITEMLPGILNQLGPESLTHLKKLANNVTKLGPDGKGEDEDVPELVGDFDAASKNETKADEQ, does the protein have the property of solubility?\nOptions:\n0. Insoluble.\n1. Soluble. You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE output within <answer> </answer> tag.\nassistant\n", "<think>\nThe protein has the property of solubility. The solubility of a protein is determined by its amino acid composition and sequence. The solubility of a protein can be affected by various factors, such as pH, temperature, ionic strength, and the presence of other proteins or ligands. The solubility of a protein can be estimated by its net charge and hydrophobicity. A protein with a high net charge or a high hydrophobicity is more likely to be soluble. In this case, the protein has a net charge of 0 and a hydrophobicity of 0.494, which indicates that it is a hydrophilic protein. Hydrophilic proteins tend to be more soluble than hydrophobic proteins. The presence of a large number of hydrophobic amino acids, such as phenylalanine, tryptophan, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 11 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine, can also increase the solubility of a protein. In this case, the protein contains 14 hydrophilic amino acids, which is a moderate number. The presence of a large number of small amino acids, such as alanine, glycine, and proline, can also increase the solubility of a protein. In this case, the protein contains 14 small amino acids, which is a moderate number. The presence of a large number of aromatic amino acids, such as phenylalanine, tyrosine, and tryptophan, can also increase the solubility of a protein. In this case, the protein contains 10 aromatic amino acids, which is a moderate number. The presence of a large number of hydrophobic amino acids, such as leucine, isoleucine, and valine, can also increase the solubility of a protein. In this case, the protein contains 10 hydrophobic amino acids, which is a moderate number. The presence of a large number of charged amino acids, such as arginine, lysine, and glutamic acid, can also increase the solubility of a protein. In this case, the protein contains 10 charged amino acids, which is a moderate number. The presence of a large number of proline and glycine residues can also increase the solubility of a protein. In this case, the protein contains 10 proline residues and 1 glycine residue, which is a moderate number. The presence of a large number of hydrophilic amino acids, such as serine, threonine, and tyrosine", 0, 0.0]]}
EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/media/table/val/generations_104_ccf2e4d7f5a8bd440fdc.table.json ADDED
The diff for this file is too large to render. See raw diff
 
EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/media/table/val/generations_109_dfde4027f4468ecb4bdb.table.json ADDED
The diff for this file is too large to render. See raw diff
 
EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/requirements.txt ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setproctitle==1.2.2
2
+ colorama==0.4.6
3
+ psutil==7.0.0
4
+ numpy==2.2.6
5
+ pylatexenc==2.10
6
+ pyzmq==26.4.0
7
+ email_validator==2.2.0
8
+ pyasn1==0.6.1
9
+ requests==2.32.4
10
+ omegaconf==2.3.0
11
+ tzdata==2025.2
12
+ yarl==1.20.1
13
+ watchfiles==1.0.5
14
+ nvidia-cuda-nvrtc-cu12==12.4.127
15
+ nvidia-cublas-cu12==12.4.5.8
16
+ proto-plus==1.26.1
17
+ wandb==0.20.1
18
+ scipy==1.15.3
19
+ googleapis-common-protos==1.70.0
20
+ nvidia-cufile-cu12==1.11.1.6
21
+ opentelemetry-exporter-otlp-proto-http==1.26.0
22
+ vllm==0.8.5.post1
23
+ sniffio==1.3.1
24
+ ray==2.47.0
25
+ python-dateutil==2.9.0.post0
26
+ Deprecated==1.2.18
27
+ setuptools==78.1.1
28
+ aiosignal==1.3.2
29
+ platformdirs==4.3.8
30
+ regex==2024.11.6
31
+ aiohappyeyeballs==2.6.1
32
+ uvloop==0.21.0
33
+ opentelemetry-semantic-conventions-ai==0.4.9
34
+ virtualenv==20.31.2
35
+ rich==14.0.0
36
+ accelerate==1.7.0
37
+ datasets==3.6.0
38
+ python-dotenv==1.1.0
39
+ antlr4-python3-runtime==4.9.3
40
+ gitdb==4.0.12
41
+ six==1.17.0
42
+ smmap==5.0.2
43
+ opencensus==0.11.4
44
+ annotated-types==0.7.0
45
+ xxhash==3.5.0
46
+ frozenlist==1.7.0
47
+ Jinja2==3.1.6
48
+ interegular==0.3.3
49
+ opentelemetry-semantic-conventions==0.47b0
50
+ jiter==0.10.0
51
+ idna==3.10
52
+ nvidia-cusolver-cu12==11.6.1.9
53
+ propcache==0.3.2
54
+ nest-asyncio==1.6.0
55
+ sentencepiece==0.2.0
56
+ fastapi==0.115.12
57
+ verl==0.3.1.dev0
58
+ compressed-tensors==0.9.3
59
+ typing-inspection==0.4.1
60
+ gguf==0.17.0
61
+ dnspython==2.7.0
62
+ wheel==0.45.1
63
+ python-multipart==0.0.20
64
+ cupy-cuda12x==13.4.1
65
+ xgrammar==0.1.18
66
+ starlette==0.46.2
67
+ peft==0.15.2
68
+ blake3==1.0.5
69
+ torchdata==0.11.0
70
+ qwen-vl-utils==0.0.11
71
+ sentry-sdk==2.30.0
72
+ Pygments==2.19.1
73
+ nvidia-nvjitlink-cu12==12.4.127
74
+ aiohttp-cors==0.8.1
75
+ outlines_core==0.1.26
76
+ partial-json-parser==0.2.1.1.post5
77
+ filelock==3.18.0
78
+ tensordict==0.8.3
79
+ cloudpickle==3.1.1
80
+ torchaudio==2.6.0
81
+ pandas==2.3.0
82
+ tiktoken==0.9.0
83
+ av==14.4.0
84
+ flash-attn==2.7.1.post1
85
+ certifi==2025.4.26
86
+ nvidia-nvtx-cu12==12.4.127
87
+ numba==0.61.2
88
+ networkx==3.4.2
89
+ sympy==1.13.1
90
+ pyarrow==20.0.0
91
+ modelscope==1.27.0
92
+ mpmath==1.3.0
93
+ codetiming==1.4.0
94
+ triton==3.2.0
95
+ referencing==0.36.2
96
+ dill==0.3.8
97
+ opencv-python-headless==4.11.0.86
98
+ mathruler==0.1.0
99
+ jsonschema-specifications==2025.4.1
100
+ tokenizers==0.21.1
101
+ huggingface-hub==0.33.0
102
+ rich-toolkit==0.14.7
103
+ fastapi-cli==0.0.7
104
+ python-json-logger==3.3.0
105
+ nvidia-cuda-cupti-cu12==12.4.127
106
+ httptools==0.6.4
107
+ mdurl==0.1.2
108
+ h11==0.16.0
109
+ distlib==0.3.9
110
+ airportsdata==20250523
111
+ transformers==4.52.4
112
+ opentelemetry-exporter-prometheus==0.55b1
113
+ fsspec==2025.3.0
114
+ diskcache==5.6.3
115
+ click==8.2.1
116
+ websockets==15.0.1
117
+ lark==1.2.2
118
+ uvicorn==0.34.3
119
+ grpcio==1.73.0
120
+ pillow==11.2.1
121
+ anyio==4.9.0
122
+ opentelemetry-exporter-otlp-proto-grpc==1.26.0
123
+ pydantic==2.11.6
124
+ wrapt==1.17.2
125
+ opentelemetry-api==1.26.0
126
+ nvidia-curand-cu12==10.3.5.147
127
+ rpds-py==0.25.1
128
+ exceptiongroup==1.3.0
129
+ msgpack==1.1.1
130
+ async-timeout==5.0.1
131
+ protobuf==4.25.8
132
+ httpx==0.28.1
133
+ opentelemetry-proto==1.26.0
134
+ nvidia-nccl-cu12==2.21.5
135
+ nvidia-cusparselt-cu12==0.6.2
136
+ aiohttp==3.12.12
137
+ urllib3==2.4.0
138
+ smart-open==7.1.0
139
+ markdown-it-py==3.0.0
140
+ packaging==25.0
141
+ charset-normalizer==3.4.2
142
+ py-spy==0.4.0
143
+ setproctitle==1.3.6
144
+ safetensors==0.5.3
145
+ pyasn1_modules==0.4.2
146
+ jsonschema==4.24.0
147
+ astor==0.8.1
148
+ shellingham==1.5.4
149
+ pytz==2025.2
150
+ distro==1.9.0
151
+ google-api-core==2.25.1
152
+ rsa==4.9.1
153
+ multiprocess==0.70.16
154
+ colorful==0.5.6
155
+ einops==0.8.1
156
+ lm-format-enforcer==0.10.11
157
+ pydantic_core==2.33.2
158
+ mistral_common==1.6.2
159
+ opentelemetry-sdk==1.26.0
160
+ ninja==1.11.1.4
161
+ typing_extensions==4.14.0
162
+ depyf==0.18.0
163
+ attrs==25.3.0
164
+ tqdm==4.67.1
165
+ openai==1.86.0
166
+ xformers==0.0.29.post2
167
+ prometheus-fastapi-instrumentator==7.1.0
168
+ torch==2.6.0
169
+ nvidia-cudnn-cu12==9.1.0.70
170
+ opencensus-context==0.1.3
171
+ importlib_metadata==8.0.0
172
+ orjson==3.10.18
173
+ psutil==7.0.0
174
+ liger_kernel==0.5.10
175
+ pycountry==24.6.1
176
+ zipp==3.23.0
177
+ pip==25.1
178
+ MarkupSafe==3.0.2
179
+ opentelemetry-exporter-otlp-proto-common==1.26.0
180
+ llguidance==0.7.29
181
+ llvmlite==0.44.0
182
+ multidict==6.4.4
183
+ nvidia-cufft-cu12==11.2.1.3
184
+ GitPython==3.1.44
185
+ fastrlock==0.8.3
186
+ hf-xet==1.1.3
187
+ PyYAML==6.0.2
188
+ opentelemetry-exporter-otlp==1.26.0
189
+ typer==0.16.0
190
+ msgspec==0.19.0
191
+ google-auth==2.40.3
192
+ cachetools==5.5.2
193
+ nvidia-cuda-runtime-cu12==12.4.127
194
+ torchvision==0.21.0
195
+ nvidia-cusparse-cu12==12.3.1.170
196
+ outlines==0.1.11
197
+ prometheus_client==0.22.1
198
+ httpcore==1.0.9
199
+ py-cpuinfo==9.0.0
200
+ verl==0.3.1.dev0
201
+ jaraco.functools==4.0.1
202
+ inflect==7.3.1
203
+ jaraco.collections==5.1.0
204
+ packaging==24.2
205
+ wheel==0.45.1
206
+ tomli==2.0.1
207
+ platformdirs==4.2.2
208
+ typing_extensions==4.12.2
209
+ more-itertools==10.3.0
210
+ autocommand==2.2.2
211
+ jaraco.text==3.12.1
212
+ importlib_metadata==8.0.0
213
+ jaraco.context==5.3.0
214
+ zipp==3.19.2
215
+ backports.tarfile==1.2.0
216
+ typeguard==4.3.0
EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/wandb-metadata.json ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.10.134-008.16.kangaroo.al8.x86_64-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.0",
4
+ "startedAt": "2025-06-14T11:47:44.287213Z",
5
+ "args": [
6
+ "--node-ip-address=10.1.5.10",
7
+ "--node-manager-port=45027",
8
+ "--object-store-name=/tmp/ray/session_2025-06-14_19-45-36_987346_61061/sockets/plasma_store",
9
+ "--raylet-name=/tmp/ray/session_2025-06-14_19-45-36_987346_61061/sockets/raylet",
10
+ "--redis-address=None",
11
+ "--metrics-agent-port=63241",
12
+ "--logging-rotate-bytes=536870912",
13
+ "--logging-rotate-backup-count=5",
14
+ "--runtime-env-agent-port=58728",
15
+ "--gcs-address=10.1.5.10:60836",
16
+ "--session-name=session_2025-06-14_19-45-36_987346_61061",
17
+ "--temp-dir=/tmp/ray",
18
+ "--webui=127.0.0.1:8265",
19
+ "--cluster-id=a6a80f53697854b083283d963e7ac5cff199d6477d3b4c5f6f0ddfec",
20
+ "--startup-token=64",
21
+ "--worker-launch-time-ms=1749901539376",
22
+ "--node-id=abb2963d276b09969a5a74d875c4ca780bcf729fc5d17badc158c62f",
23
+ "--runtime-env-hash=-115784934",
24
+ "--enable-resource-isolation=false"
25
+ ],
26
+ "program": "/root/miniconda3/envs/easyr1/lib/python3.10/site-packages/ray/_private/workers/default_worker.py",
27
+ "email": "gia0603yucca@gmail.com",
28
+ "root": "/nas/shared/kilab/wangyujia/EasyR1/examples",
29
+ "host": "dsw-251511-7876c679d4-fpqq8",
30
+ "executable": "/root/miniconda3/envs/easyr1/bin/python3",
31
+ "cpu_count": 64,
32
+ "cpu_count_logical": 64,
33
+ "gpu": "NVIDIA A800-SXM4-80GB",
34
+ "gpu_count": 8,
35
+ "disk": {
36
+ "/": {
37
+ "total": "1623302262784",
38
+ "used": "799809536"
39
+ }
40
+ },
41
+ "memory": {
42
+ "total": "549755813888"
43
+ },
44
+ "cpu": {
45
+ "count": 64,
46
+ "countLogical": 64
47
+ },
48
+ "gpu_nvidia": [
49
+ {
50
+ "name": "NVIDIA A800-SXM4-80GB",
51
+ "architecture": "Ampere",
52
+ "uuid": "GPU-b6d61b63-2b46-d2f7-d450-38c1353782cb"
53
+ },
54
+ {
55
+ "name": "NVIDIA A800-SXM4-80GB",
56
+ "architecture": "Ampere",
57
+ "uuid": "GPU-7a6b18cf-311a-f939-3dcc-25b0f1fc898c"
58
+ },
59
+ {
60
+ "name": "NVIDIA A800-SXM4-80GB",
61
+ "architecture": "Ampere",
62
+ "uuid": "GPU-17cbe6f8-0339-60ce-23bb-dbd33795c1ad"
63
+ },
64
+ {
65
+ "name": "NVIDIA A800-SXM4-80GB",
66
+ "architecture": "Ampere",
67
+ "uuid": "GPU-799578dd-bc6a-3b56-26b8-3941889669c9"
68
+ },
69
+ {
70
+ "name": "NVIDIA A800-SXM4-80GB",
71
+ "architecture": "Ampere",
72
+ "uuid": "GPU-c783413d-e4e1-22c5-7c48-9296c28b08a0"
73
+ },
74
+ {
75
+ "name": "NVIDIA A800-SXM4-80GB",
76
+ "architecture": "Ampere",
77
+ "uuid": "GPU-0ad82850-a679-fa6b-9200-a26edb1bb8a4"
78
+ },
79
+ {
80
+ "name": "NVIDIA A800-SXM4-80GB",
81
+ "architecture": "Ampere",
82
+ "uuid": "GPU-e73b7d7b-4455-62ee-ec7e-a2eb1d845e07"
83
+ },
84
+ {
85
+ "name": "NVIDIA A800-SXM4-80GB",
86
+ "architecture": "Ampere",
87
+ "uuid": "GPU-71ee45de-57b2-ac7c-13c1-08a1f197eb20"
88
+ }
89
+ ],
90
+ "cudaVersion": "12.1"
91
+ }
EasyR1/examples/wandb/run-20250614_194744-cdibom67/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"critic/rewards/max":0.9997167587280273,"response_length/max":8192,"_runtime":145264.000918934,"timing_s/step":1183.4929371809994,"global_seqlen/max":1146817,"reward/format":0,"perf/cpu_memory_used_gb":407.5828285217285,"timing_s/old":102.70515622899984,"critic/returns/min":-2.4748668670654297,"actor/kl_loss":0.037933181738480926,"actor/pg_loss":-0.00841897885722176,"critic/advantages/min":-2.4748668670654297,"prompt_length/mean":314.70703125,"val/repeat_reward":0.0010451679389658478,"reward/overall":0.9458340392351747,"perf/max_memory_reserved_gb":75.796875,"prompt_length/clip_ratio":0,"perf/total_num_tokens":9002567,"prompt_length/max":1134,"perf/throughput":950.8471403981832,"critic/returns/max":2.3680503368377686,"actor/pg_clipfrac_higher":1.9632523688528636e-05,"val/generations":{"path":"media/table/val/generations_119_4e3f574f210c177c5437.table.json","nrows":25,"sha256":"4e3f574f210c177c54379d563cfe05fd219c32811853206cd37aa24896b5186c","ncols":13,"_type":"table-file","artifact_path":"wandb-client-artifact://76aqt3ola7mmf1zjvk79h2cs59rqhaslo8w0fqq45cu2m3evdeejs118qyegtnhx46a5806ogvbs8vt9zt1p7edrxhkmnyp31ey47kna9056mczftfqho4mygicga34i/val/generations.table.json","size":1223506,"log_mode":"IMMUTABLE","_latest_artifact_path":"wandb-client-artifact://h7flxr8fshgu5a89hy9khafst185p799hj8ttotsx8jkk8kyp037hrhnb8iekl0mne5kwynukkv20uhj63vd9zodj22j0nbm6o6w0bwwcqj2b8giffq60c52ebj5qneu:latest/val/generations.table.json"},"timing_s/gen":335.0029417749902,"critic/score/mean":0.9458340406417847,"_wandb":{"runtime":145264},"actor/pg_clipfrac_lower":0,"_step":124,"val/overall_reward":0.0010451679389658478,"critic/advantages/max":2.3680503368377686,"timing_per_token_ms/adv":0.02330009541756115,"timing_s/save_checkpoint":122.09420545600005,"reward/repeat":0.9458340392351747,"actor/ppo_kl":-3.353993864774196e-05,"critic/score/max":0.9997167587280273,"actor/lr":1e-06,"timing_per_token_ms/reward":0.00020019505992198455,"critic/rewards/mean":0.9458340406417847,"response_length/clip_ratio":0.01806640625,"critic/returns/mean":0.011055756360292435,"actor/kl_coef":0.01,"timing_per_token_ms/gen":0.04008148575558264,"perf/time_per_step":1183.4929371809994,"prompt_length/min":138,"global_seqlen/min":1109211,"global_seqlen/balanced_max":1125321,"_timestamp":1.7500469249759648e+09,"val/accuracy_reward":0,"critic/score/min":0,"timing_s/ref":98.26504057302373,"response_length/min":1542,"global_seqlen/balanced_min":1125320,"critic/rewards/min":0,"timing_s/update_actor":435.6776136950066,"perf/max_memory_allocated_gb":38.11449068784714,"perf/mfu_actor":0.42861793360235734,"response_length/mean":4081.07763671875,"reward/accuracy":0,"val/format_reward":0,"actor/entropy_loss":4.0293768817791715,"timing_s/reward":1.6732397199957632,"timing_s/adv":209.7606701029872,"timing_per_token_ms/update_actor":0.04839482046565236,"timing_per_token_ms/ref":0.01091522457683722,"timing_per_token_ms/old":0.011408430087662756,"timing_s/validation":2072.6338901569834,"val/reward_score":0.001045167911797762,"global_seqlen/minmax_diff":37606,"critic/advantages/mean":0.011055756360292435,"actor/grad_norm":0.12824900448322296,"global_seqlen/mean":1.125320875e+06}
EasyR1/verl.egg-info/PKG-INFO ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: verl
3
+ Version: 0.3.1.dev0
4
+ Summary: An Efficient, Scalable, Multi-Modality RL Training Framework based on veRL
5
+ Home-page: https://github.com/volcengine/verl
6
+ Author: verl
7
+ Author-email: zhangchi.usc1992@bytedance.com, gmsheng@connect.hku.hk, hiyouga@buaa.edu.cn
8
+ License: Apache 2.0 License
9
+ Platform: UNKNOWN
10
+ Requires-Python: >=3.9.0
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: accelerate
14
+ Requires-Dist: codetiming
15
+ Requires-Dist: datasets
16
+ Requires-Dist: liger-kernel
17
+ Requires-Dist: mathruler
18
+ Requires-Dist: numpy
19
+ Requires-Dist: omegaconf
20
+ Requires-Dist: pandas
21
+ Requires-Dist: peft
22
+ Requires-Dist: pillow
23
+ Requires-Dist: pyarrow>=15.0.0
24
+ Requires-Dist: pylatexenc
25
+ Requires-Dist: qwen-vl-utils
26
+ Requires-Dist: ray[default]
27
+ Requires-Dist: tensordict
28
+ Requires-Dist: torchdata
29
+ Requires-Dist: transformers>=4.51.0
30
+ Requires-Dist: vllm>=0.7.3
31
+ Requires-Dist: wandb
32
+ Provides-Extra: dev
33
+ Requires-Dist: pre-commit; extra == "dev"
34
+ Requires-Dist: ruff; extra == "dev"
35
+ Dynamic: author
36
+ Dynamic: author-email
37
+ Dynamic: description
38
+ Dynamic: description-content-type
39
+ Dynamic: home-page
40
+ Dynamic: license
41
+ Dynamic: license-file
42
+ Dynamic: provides-extra
43
+ Dynamic: requires-dist
44
+ Dynamic: requires-python
45
+ Dynamic: summary
46
+
47
+ # EasyR1: An Efficient, Scalable, Multi-Modality RL Training Framework
48
+
49
+ [![GitHub Repo stars](https://img.shields.io/github/stars/hiyouga/EasyR1)](https://github.com/hiyouga/EasyR1/stargazers)
50
+ [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai)
51
+
52
+ This project is a clean fork of the original [veRL](https://github.com/volcengine/verl) project to support vision language models, we thank all the authors for providing such a high-performance RL training framework.
53
+
54
+ EasyR1 is efficient and scalable due to the design of **[HybirdEngine](https://arxiv.org/abs/2409.19256)** and the latest release of **[vLLM](https://github.com/vllm-project/vllm)**'s SPMD mode.
55
+
56
+ ## Features
57
+
58
+ - Supported models
59
+ - Llama3/Qwen2/Qwen2.5/Qwen3 language models
60
+ - Qwen2/Qwen2.5-VL vision language models
61
+ - DeepSeek-R1 distill models
62
+
63
+ - Supported algorithms
64
+ - GRPO
65
+ - Reinforce++
66
+ - ReMax
67
+ - RLOO
68
+
69
+ - Supported datasets
70
+ - Any text, vision-text dataset in a [specific format](#custom-dataset)
71
+
72
+ - Supported tricks
73
+ - Padding-free training
74
+ - Resuming from checkpoint
75
+ - Wandb & SwanLab & Mlflow & Tensorboard tracking
76
+
77
+ ## Requirements
78
+
79
+ ### Software Requirements
80
+
81
+ - Python 3.9+
82
+ - transformers>=4.51.0
83
+ - flash-attn>=2.4.3
84
+ - vllm>=0.8.3
85
+
86
+ We provide a [Dockerfile](./Dockerfile) to easily build environments.
87
+
88
+ We recommend using the [pre-built docker image](https://hub.docker.com/r/hiyouga/verl) in EasyR1.
89
+
90
+ ```bash
91
+ docker pull hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.4-flashinfer0.2.2-cxx11abi0
92
+ ```
93
+
94
+ ### Hardware Requirements
95
+
96
+ \* *estimated*
97
+
98
+ | Method | Bits | 1.5B | 3B | 7B | 32B |
99
+ | ------------------------ | ---- | ------ | ------ | ------ | ------- |
100
+ | GRPO Full Fine-Tuning | AMP | 2*24GB | 4*40GB | 8*40GB | 16*80GB |
101
+ | GRPO Full Fine-Tuning | BF16 | 1*24GB | 1*40GB | 4*40GB | 8*80GB |
102
+
103
+ > [!NOTE]
104
+ > Use `worker.actor.fsdp.torch_dtype=bf16` and `worker.actor.optim.strategy=adamw_bf16` to enable bf16 training.
105
+ >
106
+ > We are working hard to reduce the VRAM in RL training, LoRA support will be integrated in next updates.
107
+
108
+ ## Tutorial: Run Qwen2.5-VL GRPO on [Geometry3K](https://huggingface.co/datasets/hiyouga/geometry3k) Dataset in Just 3 Steps
109
+
110
+ ![image](assets/qwen2_5_vl_7b_geo.png)
111
+
112
+ ### Installation
113
+
114
+ ```bash
115
+ git clone https://github.com/hiyouga/EasyR1.git
116
+ cd EasyR1
117
+ pip install -e .
118
+ ```
119
+
120
+ ### GRPO Training
121
+
122
+ ```bash
123
+ bash examples/qwen2_5_vl_7b_geo3k_grpo.sh
124
+ ```
125
+
126
+ ### Merge Checkpoint in Hugging Face Format
127
+
128
+ ```bash
129
+ python3 scripts/model_merger.py --local_dir checkpoints/easy_r1/exp_name/global_step_1/actor
130
+ ```
131
+
132
+ > [!TIP]
133
+ > If you encounter issues with connecting to Hugging Face, consider using `export HF_ENDPOINT=https://hf-mirror.com`.
134
+ >
135
+ > If you want to use SwanLab logger, consider using `bash examples/qwen2_5_vl_7b_geo3k_swanlab.sh`.
136
+
137
+ ## Custom Dataset
138
+
139
+ Please refer to the example datasets to prepare your own dataset.
140
+
141
+ - Text dataset: https://huggingface.co/datasets/hiyouga/math12k
142
+ - Image-text dataset: https://huggingface.co/datasets/hiyouga/geometry3k
143
+ - Multi-image-text dataset: https://huggingface.co/datasets/hiyouga/journeybench-multi-image-vqa
144
+
145
+ ## How to Understand GRPO in EasyR1
146
+
147
+ ![image](assets/easyr1_grpo.png)
148
+
149
+ - To learn about the GRPO algorithm, you can refer to [Hugging Face's blog](https://huggingface.co/docs/trl/v0.16.1/en/grpo_trainer).
150
+
151
+ ## How to Run 70B+ Model in Multi-node Environment
152
+
153
+ 1. Start the Ray head node.
154
+
155
+ ```bash
156
+ ray start --head --port=6379 --dashboard-host=0.0.0.0
157
+ ```
158
+
159
+ 2. Start the Ray worker node and connect to the head node.
160
+
161
+ ```bash
162
+ ray start --address=<head_node_ip>:6379
163
+ ```
164
+
165
+ 3. Check the Ray resource pool.
166
+
167
+ ```bash
168
+ ray status
169
+ ```
170
+
171
+ 4. Run training script on the Ray head node only.
172
+
173
+ ```bash
174
+ bash examples/qwen2_5_vl_7b_geo3k_grpo.sh
175
+ ```
176
+
177
+ See the **[veRL's official doc](https://verl.readthedocs.io/en/latest/start/multinode.html)** for more details about multi-node training and Ray debugger.
178
+
179
+ ## Other Baselines
180
+
181
+ We also reproduced the following two baselines of the [R1-V](https://github.com/deep-agent/R1-V) project.
182
+ - [CLEVR-70k-Counting](examples/baselines/qwen2_5_vl_3b_clevr.sh): Train the Qwen2.5-VL-3B-Instruct model on counting problem.
183
+ - [GeoQA-8k](examples/baselines/qwen2_5_vl_3b_geoqa8k.sh): Train the Qwen2.5-VL-3B-Instruct model on GeoQA problem.
184
+
185
+ ## Performance Baselines
186
+
187
+ See [baselines.md](assets/baselines.md).
188
+
189
+ ## Awesome Work using EasyR1
190
+
191
+ - **MMR1**: Advancing the Frontiers of Multimodal Reasoning. [![[code]](https://img.shields.io/github/stars/LengSicong/MMR1)](https://github.com/LengSicong/MMR1)
192
+ - **Vision-R1**: Incentivizing Reasoning Capability in Multimodal Large Language Models. [![[code]](https://img.shields.io/github/stars/Osilly/Vision-R1)](https://github.com/Osilly/Vision-R1) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.06749-blue)](https://arxiv.org/abs/2503.06749)
193
+ - **Seg-Zero**: Reasoning-Chain Guided Segmentation via Cognitive Reinforcement. [![[code]](https://img.shields.io/github/stars/dvlab-research/Seg-Zero)](https://github.com/dvlab-research/Seg-Zero) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.06520-blue)](https://arxiv.org/abs/2503.06520)
194
+ - **MetaSpatial**: Reinforcing 3D Spatial Reasoning in VLMs for the Metaverse. [![[code]](https://img.shields.io/github/stars/PzySeere/MetaSpatial)](https://github.com/PzySeere/MetaSpatial) [![[arxiv]](https://img.shields.io/badge/arxiv-2503.18470-blue)](https://arxiv.org/abs/2503.18470)
195
+ - **Temporal-R1**: Envolving Temporal Reasoning Capability into LMMs via Temporal Consistent Reward. [![[code]](https://img.shields.io/github/stars/appletea233/Temporal-R1)](https://github.com/appletea233/Temporal-R1)
196
+ - **NoisyRollout**: Reinforcing Visual Reasoning with Data Augmentation. [![[code]](https://img.shields.io/github/stars/John-AI-Lab/NoisyRollout)](https://github.com/John-AI-Lab/NoisyRollout) [![[arxiv]](https://img.shields.io/badge/arxiv-2504.13055-blue)](https://arxiv.org/pdf/2504.13055)
197
+ - **GUI-R1**: A Generalist R1-Style Vision-Language Action Model For GUI Agents. [![[code]](https://img.shields.io/github/stars/ritzz-ai/GUI-R1)](https://github.com/ritzz-ai/GUI-R1) [![[arxiv]](https://img.shields.io/badge/arxiv-2504.10458-blue)](https://arxiv.org/abs/2504.10458)
198
+ - **R1-Track**: Direct Application of MLLMs to Visual Object Tracking via Reinforcement Learning. [![[code]](https://img.shields.io/github/stars/Wangbiao2/R1-Track)](https://github.com/Wangbiao2/R1-Track)
199
+ - **VisionReasoner**: Unified Visual Perception and Reasoning via Reinforcement Learning. [![[code]](https://img.shields.io/github/stars/dvlab-research/VisionReasoner)](https://github.com/dvlab-research/VisionReasoner) [![[arxiv]](https://img.shields.io/badge/arxiv-2505.12081-blue)](https://arxiv.org/abs/2505.12081)
200
+ - **MM-UPT**: Unsupervised Post-Training for Multi-Modal LLM Reasoning via GRPO. [![[code]](https://img.shields.io/github/stars/waltonfuture/MM-UPT)](https://github.com/waltonfuture/MM-UPT) [![[arxiv]](https://img.shields.io/badge/arxiv-2505.22453-blue)](https://arxiv.org/pdf/2505.22453)
201
+ - **RL-with-Cold-Start**: Advancing Multimodal Reasoning via Reinforcement Learning with Cold Start. [![[code]](https://img.shields.io/github/stars/waltonfuture/RL-with-Cold-Start)](https://github.com/waltonfuture/RL-with-Cold-Start) [![[arxiv]](https://img.shields.io/badge/arxiv-2505.22334-blue)](https://arxiv.org/pdf/2505.22334)
202
+ - **ViGoRL**: Grounded Reinforcement Learning for Visual Reasoning. [![[code]](https://img.shields.io/github/stars/Gabesarch/grounded-rl)](https://github.com/Gabesarch/grounded-rl) [![[arxiv]](https://img.shields.io/badge/arxiv-2505.22334-blue)](https://arxiv.org/abs/2505.23678)
203
+ - **Revisual-R1**: Advancing Multimodal Reasoning: From Optimized Cold Start to Staged Reinforcement Learning. [![[code]](https://img.shields.io/github/stars/CSfufu/Revisual-R1)](https://github.com/CSfufu/Revisual-R1) [![[arxiv]](https://img.shields.io/badge/arxiv-2506.04207-blue)](https://arxiv.org/abs/2506.04207)
204
+ - **SophiaVL-R1**: Reinforcing MLLMs Reasoning with Thinking Reward. [![[code]](https://img.shields.io/github/stars/kxfan2002/SophiaVL-R1)](https://github.com/kxfan2002/SophiaVL-R1) [![[arxiv]](https://img.shields.io/badge/arxiv-2505.17018-blue)](https://arxiv.org/abs/2505.17018)
205
+ - **Vision-Matters**: Simple Visual Perturbations Can Boost Multimodal Math Reasoning. [![[code]](https://img.shields.io/github/stars/YutingLi0606/Vision-Matters)](https://github.com/YutingLi0606/Vision-Matters) [![[arxiv]](https://img.shields.io/badge/arxiv-2506.09736-blue)](https://arxiv.org/abs/2506.09736)
206
+
207
+ ## TODO
208
+
209
+ - Support LoRA (high priority).
210
+ - Support ulysses parallelism for VLMs (middle priority).
211
+ - Support more VLM architectures.
212
+
213
+ > [!NOTE]
214
+ > We will not provide scripts for supervised fine-tuning and inference in this project. If you have such requirements, we recommend using [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory).
215
+
216
+ ### Known bugs
217
+
218
+ These features are temporarily disabled for now, we plan to fix them one-by-one in the future updates.
219
+
220
+ - Vision language models are not compatible with ulysses parallelism yet.
221
+
222
+ ## Discussion Group
223
+
224
+ 👋 Join our [WeChat group](assets/wechat.jpg).
225
+
226
+ ## FAQs
227
+
228
+ > ValueError: Image features and image tokens do not match: tokens: 8192, features 9800
229
+
230
+ Increase the `data.max_prompt_length` or reduce the `data.max_pixels`.
231
+
232
+ > RuntimeError: CUDA Error: out of memory at /workspace/csrc/cumem_allocator.cpp:62
233
+
234
+ Reduce the `worker.rollout.gpu_memory_utilization` and enable `worker.actor.offload.offload_params`.
235
+
236
+ > RuntimeError: 0 active drivers ([]). There should only be one.
237
+
238
+ Uninstall `deepspeed` from the current python environment.
239
+
240
+ ## Citation
241
+
242
+ Core contributors: [Yaowei Zheng](https://github.com/hiyouga), [Junting Lu](https://github.com/AL-377), [Shenzhi Wang](https://github.com/Shenzhi-Wang), [Zhangchi Feng](https://github.com/BUAADreamer), [Dongdong Kuang](https://github.com/Kuangdd01) and Yuwen Xiong
243
+
244
+ We also thank Guangming Sheng and Chi Zhang for helpful discussions.
245
+
246
+ ```bibtex
247
+ @misc{zheng2025easyr1,
248
+ title = {EasyR1: An Efficient, Scalable, Multi-Modality RL Training Framework},
249
+ author = {Yaowei Zheng, Junting Lu, Shenzhi Wang, Zhangchi Feng, Dongdong Kuang, Yuwen Xiong},
250
+ howpublished = {\url{https://github.com/hiyouga/EasyR1}},
251
+ year = {2025}
252
+ }
253
+ ```
254
+
255
+ We recommend to also cite the original work.
256
+
257
+ ```bibtex
258
+ @article{sheng2024hybridflow,
259
+ title = {HybridFlow: A Flexible and Efficient RLHF Framework},
260
+ author = {Guangming Sheng and Chi Zhang and Zilingfeng Ye and Xibin Wu and Wang Zhang and Ru Zhang and Yanghua Peng and Haibin Lin and Chuan Wu},
261
+ year = {2024},
262
+ journal = {arXiv preprint arXiv: 2409.19256}
263
+ }
264
+ ```
EasyR1/verl.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ setup.py
5
+ ./verl/__init__.py
6
+ ./verl/protocol.py
7
+ ./verl/models/__init__.py
8
+ ./verl/models/monkey_patch.py
9
+ ./verl/models/transformers/__init__.py
10
+ ./verl/models/transformers/flash_attention_utils.py
11
+ ./verl/models/transformers/qwen2_vl.py
12
+ ./verl/single_controller/__init__.py
13
+ ./verl/single_controller/base/__init__.py
14
+ ./verl/single_controller/base/decorator.py
15
+ ./verl/single_controller/base/worker.py
16
+ ./verl/single_controller/base/worker_group.py
17
+ ./verl/single_controller/base/register_center/__init__.py
18
+ ./verl/single_controller/base/register_center/ray.py
19
+ ./verl/single_controller/ray/__init__.py
20
+ ./verl/single_controller/ray/base.py
21
+ ./verl/trainer/__init__.py
22
+ ./verl/trainer/config.py
23
+ ./verl/trainer/core_algos.py
24
+ ./verl/trainer/data_loader.py
25
+ ./verl/trainer/main.py
26
+ ./verl/trainer/metrics.py
27
+ ./verl/trainer/ray_trainer.py
28
+ ./verl/utils/__init__.py
29
+ ./verl/utils/dataset.py
30
+ ./verl/utils/flops_counter.py
31
+ ./verl/utils/fsdp_utils.py
32
+ ./verl/utils/model_utils.py
33
+ ./verl/utils/py_functional.py
34
+ ./verl/utils/seqlen_balancing.py
35
+ ./verl/utils/tokenizer.py
36
+ ./verl/utils/torch_dtypes.py
37
+ ./verl/utils/torch_functional.py
38
+ ./verl/utils/ulysses.py
39
+ ./verl/utils/checkpoint/__init__.py
40
+ ./verl/utils/checkpoint/checkpoint_manager.py
41
+ ./verl/utils/checkpoint/fsdp_checkpoint_manager.py
42
+ ./verl/utils/logger/__init__.py
43
+ ./verl/utils/logger/gen_logger.py
44
+ ./verl/utils/logger/logger.py
45
+ ./verl/workers/__init__.py
46
+ ./verl/workers/config.py
47
+ ./verl/workers/fsdp_workers.py
48
+ ./verl/workers/actor/__init__.py
49
+ ./verl/workers/actor/base.py
50
+ ./verl/workers/actor/config.py
51
+ ./verl/workers/actor/dp_actor.py
52
+ ./verl/workers/critic/__init__.py
53
+ ./verl/workers/critic/base.py
54
+ ./verl/workers/critic/config.py
55
+ ./verl/workers/critic/dp_critic.py
56
+ ./verl/workers/reward/__init__.py
57
+ ./verl/workers/reward/config.py
58
+ ./verl/workers/reward/function.py
59
+ ./verl/workers/rollout/__init__.py
60
+ ./verl/workers/rollout/base.py
61
+ ./verl/workers/rollout/config.py
62
+ ./verl/workers/rollout/vllm_rollout_spmd.py
63
+ ./verl/workers/sharding_manager/__init__.py
64
+ ./verl/workers/sharding_manager/base.py
65
+ ./verl/workers/sharding_manager/fsdp_ulysses.py
66
+ ./verl/workers/sharding_manager/fsdp_vllm.py
67
+ verl.egg-info/PKG-INFO
68
+ verl.egg-info/SOURCES.txt
69
+ verl.egg-info/dependency_links.txt
70
+ verl.egg-info/requires.txt
71
+ verl.egg-info/top_level.txt
EasyR1/verl.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
EasyR1/verl.egg-info/requires.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate
2
+ codetiming
3
+ datasets
4
+ liger-kernel
5
+ mathruler
6
+ numpy
7
+ omegaconf
8
+ pandas
9
+ peft
10
+ pillow
11
+ pyarrow>=15.0.0
12
+ pylatexenc
13
+ qwen-vl-utils
14
+ ray[default]
15
+ tensordict
16
+ torchdata
17
+ transformers>=4.51.0
18
+ vllm>=0.7.3
19
+ wandb
20
+
21
+ [dev]
22
+ pre-commit
23
+ ruff
EasyR1/verl.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ verl
EasyR1/verl/workers/sharding_manager/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (373 Bytes). View file
 
EasyR1/verl/workers/sharding_manager/__pycache__/base.cpython-310.pyc ADDED
Binary file (1.05 kB). View file
 
EasyR1/verl/workers/sharding_manager/__pycache__/fsdp_ulysses.cpython-310.pyc ADDED
Binary file (2.4 kB). View file
 
EasyR1/verl/workers/sharding_manager/__pycache__/fsdp_vllm.cpython-310.pyc ADDED
Binary file (5 kB). View file
 
EasyR1/verl/workers/sharding_manager/fsdp_ulysses.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 Bytedance Ltd. and/or its affiliates
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """
15
+ Contains a resharding manager that binds weights from FSDP zero3 to XPerfGPT
16
+ """
17
+
18
+ from torch.distributed.device_mesh import DeviceMesh
19
+
20
+ from ...protocol import DataProto, all_gather_data_proto
21
+ from ...utils.ulysses import get_ulysses_sequence_parallel_group, set_ulysses_sequence_parallel_group
22
+ from .base import BaseShardingManager
23
+
24
+
25
+ class FSDPUlyssesShardingManager(BaseShardingManager):
26
+ """
27
+ Sharding manager to support data resharding when using FSDP + Ulysses
28
+ """
29
+
30
+ def __init__(self, device_mesh: DeviceMesh):
31
+ super().__init__()
32
+ self.device_mesh = device_mesh
33
+
34
+ def __enter__(self):
35
+ if self.device_mesh is not None:
36
+ self.prev_sp_group = get_ulysses_sequence_parallel_group()
37
+ set_ulysses_sequence_parallel_group(self.device_mesh["sp"].get_group())
38
+
39
+ def __exit__(self, exc_type, exc_value, traceback):
40
+ if self.device_mesh is not None:
41
+ set_ulysses_sequence_parallel_group(self.prev_sp_group)
42
+
43
+ def preprocess_data(self, data: DataProto) -> DataProto:
44
+ """
45
+ AllGather data from sp region
46
+ This is because the data is first sharded along the FSDP dimension as we utilize the DP_COMPUTE
47
+ In Ulysses, we need to make sure the same data is used across a SP group
48
+ """
49
+ if self.device_mesh is not None:
50
+ sp_size = self.device_mesh["sp"].size()
51
+ sp_group = self.device_mesh["sp"].get_group()
52
+ all_gather_data_proto(data, size=sp_size, group=sp_group)
53
+
54
+ return data
55
+
56
+ def postprocess_data(self, data: DataProto) -> DataProto:
57
+ """
58
+ Split the data to follow FSDP partition
59
+ """
60
+ if self.device_mesh is not None:
61
+ sp_size = self.device_mesh["sp"].size()
62
+ sp_rank = self.device_mesh["sp"].get_local_rank()
63
+ data = data.chunk(chunks=sp_size)[sp_rank]
64
+
65
+ return data
EasyR1/verl/workers/sharding_manager/fsdp_vllm.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 Bytedance Ltd. and/or its affiliates
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import inspect
16
+ import re
17
+ from typing import Dict, Iterable, Tuple, Union
18
+
19
+ import torch
20
+ import torch.distributed as dist
21
+ from torch.distributed._tensor import DTensor
22
+ from torch.distributed.checkpoint.state_dict import get_model_state_dict
23
+ from torch.distributed.device_mesh import DeviceMesh
24
+ from torch.distributed.fsdp.fully_sharded_data_parallel import FullyShardedDataParallel as FSDP
25
+ from transformers import PreTrainedModel
26
+ from vllm import LLM
27
+ from vllm.distributed import parallel_state as vllm_ps
28
+
29
+ from ...protocol import DataProto, all_gather_data_proto
30
+ from ...utils.model_utils import print_gpu_memory_usage
31
+ from .base import BaseShardingManager
32
+
33
+
34
+ class FSDPVLLMShardingManager(BaseShardingManager):
35
+ def __init__(
36
+ self,
37
+ module: FSDP,
38
+ inference_engine: LLM,
39
+ device_mesh: DeviceMesh,
40
+ ):
41
+ self.module = module
42
+ self.inference_engine = inference_engine
43
+ self.device_mesh = device_mesh
44
+
45
+ self.world_size = dist.get_world_size()
46
+ self.tp_size = vllm_ps.get_tensor_model_parallel_world_size()
47
+ self.tp_rank = vllm_ps.get_tensor_model_parallel_rank()
48
+ self.tp_group = vllm_ps.get_tensor_model_parallel_group().device_group
49
+
50
+ # Record freed bytes to estimate memory usage correctly
51
+ # https://github.com/vllm-project/vllm/pull/11743#issuecomment-2754338119
52
+ self.freed_bytes = 0
53
+
54
+ # Note that torch_random_states may be different on each dp rank
55
+ self.torch_random_states = torch.cuda.get_rng_state()
56
+ # get a random rng states
57
+ gen_dp_rank = self.device_mesh["dp"].get_local_rank()
58
+ torch.cuda.manual_seed(gen_dp_rank + 1000) # make sure all tp ranks have the same random states
59
+ self.gen_random_states = torch.cuda.get_rng_state()
60
+ torch.cuda.set_rng_state(self.torch_random_states)
61
+
62
+ def _rename_weight_keys(self, actor_weights: Dict[str, Union[torch.Tensor, DTensor]], model: PreTrainedModel):
63
+ # convert state dict keys: https://github.com/huggingface/transformers/pull/38385
64
+ if not hasattr(model, "_checkpoint_conversion_mapping"):
65
+ return actor_weights
66
+
67
+ reverse_key_mapping = {v: k for k, v in model._checkpoint_conversion_mapping.items()}
68
+ original_weights = {}
69
+ for key, value in actor_weights.items():
70
+ for pattern, replacement in reverse_key_mapping.items():
71
+ replacement = replacement.lstrip("^") # strip off un-needed chars and patterns
72
+ replacement = re.sub(r"\(.*\)", "", replacement)
73
+ key, n_replace = re.subn(pattern, replacement, key)
74
+ # Early exit of the loop
75
+ if n_replace > 0:
76
+ break
77
+
78
+ original_weights[key] = value
79
+
80
+ return original_weights
81
+
82
+ def _make_weight_iterator(
83
+ self, actor_weights: Dict[str, Union[torch.Tensor, DTensor]]
84
+ ) -> Iterable[Tuple[str, torch.Tensor]]:
85
+ for name, tensor in actor_weights.items():
86
+ yield name, tensor.full_tensor() if self.world_size != 1 else tensor
87
+
88
+ def __enter__(self):
89
+ # NOTE: Basically, we only need `torch.cuda.empty_cache()` before vllm wake_up and
90
+ # after vllm sleep, since vllm has its own caching memory allocator CuMemAllocator.
91
+ # Out of vllm scope, we should avoid empty cache to let pytorch using caching memory
92
+ # to speed up memory allocations.
93
+ #
94
+ # pytorch: https://pytorch.org/docs/stable/notes/cuda.html#memory-management
95
+ # vllm: https://github.com/vllm-project/vllm/blob/v0.7.3/vllm/device_allocator/cumem.py#L103
96
+ torch.cuda.empty_cache()
97
+ print_gpu_memory_usage("Before state_dict() in sharding manager")
98
+ actor_weights = get_model_state_dict(self.module)
99
+ actor_weights = self._rename_weight_keys(actor_weights, self.module._fsdp_wrapped_module)
100
+ print_gpu_memory_usage("After state_dict() in sharding manager")
101
+
102
+ if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
103
+ self.inference_engine.wake_up(tags=["weights"])
104
+ else:
105
+ self.inference_engine.wake_up()
106
+
107
+ model = self.inference_engine.llm_engine.model_executor.driver_worker.worker.model_runner.model
108
+ model.load_weights(self._make_weight_iterator(actor_weights))
109
+ print_gpu_memory_usage("After sync model weights in sharding manager")
110
+
111
+ del actor_weights
112
+ torch.cuda.empty_cache()
113
+
114
+ if "tags" in inspect.signature(self.inference_engine.wake_up).parameters:
115
+ self.inference_engine.wake_up(tags=["kv_cache"])
116
+
117
+ print_gpu_memory_usage("After del state_dict and empty_cache in sharding manager")
118
+ # important: need to manually set the random states of each tp to be identical.
119
+ if self.device_mesh is not None:
120
+ self.torch_random_states = torch.cuda.get_rng_state()
121
+ torch.cuda.set_rng_state(self.gen_random_states)
122
+
123
+ def __exit__(self, exc_type, exc_value, traceback):
124
+ print_gpu_memory_usage("Before vllm offload in sharding manager")
125
+ free_bytes_before_sleep = torch.cuda.mem_get_info()[0]
126
+ self.inference_engine.sleep(level=1)
127
+ free_bytes_after_sleep = torch.cuda.mem_get_info()[0]
128
+ self.freed_bytes = free_bytes_after_sleep - free_bytes_before_sleep
129
+ print_gpu_memory_usage("After vllm offload in sharding manager")
130
+
131
+ self.module.train()
132
+ torch.cuda.empty_cache() # add empty cache after each compute
133
+
134
+ # restore random states
135
+ if self.device_mesh is not None:
136
+ self.gen_random_states = torch.cuda.get_rng_state()
137
+ torch.cuda.set_rng_state(self.torch_random_states)
138
+
139
+ def preprocess_data(self, data: DataProto) -> DataProto:
140
+ """All gather across tp group to make each rank has identical input."""
141
+ all_gather_data_proto(data, size=self.tp_size, group=self.tp_group)
142
+ return data
143
+
144
+ def postprocess_data(self, data: DataProto) -> DataProto:
145
+ """Get chunk data of this tp rank since we do all gather in preprocess."""
146
+ if self.tp_size > 1:
147
+ data = data.chunk(chunks=self.tp_size)[self.tp_rank]
148
+
149
+ return data
LAVIS-main/lavis/models/alpro_models/alpro_retrieval.py ADDED
@@ -0,0 +1,422 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Copyright (c) 2022, salesforce.com, inc.
3
+ All rights reserved.
4
+ SPDX-License-Identifier: BSD-3-Clause
5
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6
+ """
7
+
8
+ import datetime
9
+ import logging
10
+ import time
11
+
12
+ import lavis.common.dist_utils as dist_utils
13
+ import numpy as np
14
+ import torch
15
+ import torch.distributed as dist
16
+ import torch.nn.functional as F
17
+ from lavis.common.config import node_to_dict
18
+ from lavis.common.dist_utils import get_rank
19
+ from lavis.common.logger import MetricLogger
20
+ from lavis.common.registry import registry
21
+ from lavis.models.alpro_models import AlproBase
22
+ from lavis.models.alpro_models.alpro_outputs import AlproIntermediateOutput, AlproOutput
23
+ from lavis.models.base_model import all_gather_with_grad
24
+ from lavis.models.med import XBertEncoder
25
+ from lavis.models.timesformer.vit import TimeSformer
26
+ from torch import nn
27
+
28
+
29
+ @registry.register_model("alpro_retrieval")
30
+ class AlproRetrieval(AlproBase):
31
+ PRETRAINED_MODEL_CONFIG_DICT = {
32
+ "msrvtt": "configs/models/alpro_retrieval_msrvtt.yaml",
33
+ "didemo": "configs/models/alpro_retrieval_didemo.yaml",
34
+ }
35
+
36
+ def __init__(
37
+ self,
38
+ visual_encoder,
39
+ text_encoder,
40
+ vision_width=768,
41
+ text_width=768,
42
+ embed_dim=256,
43
+ max_txt_len=35,
44
+ temp=0.07,
45
+ ):
46
+ super().__init__()
47
+
48
+ self.temp = nn.Parameter(torch.ones([]) * temp)
49
+
50
+ self.tokenizer = self.init_tokenizer()
51
+
52
+ self.visual_encoder = visual_encoder
53
+ self.text_encoder = text_encoder
54
+
55
+ vision_width = vision_width
56
+ text_width = text_width
57
+
58
+ self.vision_proj = nn.Linear(vision_width, embed_dim)
59
+ self.text_proj = nn.Linear(text_width, embed_dim)
60
+
61
+ self.itm_head = nn.Linear(text_width, 2)
62
+
63
+ self.max_txt_len = max_txt_len
64
+
65
+ def forward(self, samples):
66
+ with torch.no_grad():
67
+ self.temp.clamp_(0.001, 0.5)
68
+
69
+ visual_inputs = samples["video"]
70
+ caption = samples["text_input"]
71
+
72
+ b, t, c, h, w = visual_inputs.shape
73
+
74
+ # forward text
75
+ text = self.tokenizer(
76
+ caption,
77
+ padding="max_length",
78
+ truncation=True,
79
+ max_length=self.max_txt_len,
80
+ return_tensors="pt",
81
+ ).to(self.device)
82
+
83
+ text_output = self.text_encoder.forward_text(
84
+ text,
85
+ token_type_ids=torch.zeros(
86
+ text.input_ids.shape, dtype=torch.long, device=self.device
87
+ ),
88
+ )
89
+ text_embeds = text_output.last_hidden_state
90
+ text_feat = F.normalize(self.text_proj(text_embeds[:, 0, :]), dim=-1)
91
+
92
+ # forward visual
93
+ # timeSformer asks for (b, c, t, h, w) as input.
94
+ video_embeds = self.visual_encoder.forward_features(visual_inputs)
95
+ video_feat = F.normalize(self.vision_proj(video_embeds[:, 0, :]), dim=-1)
96
+ video_atts = torch.ones(video_embeds.size()[:-1], dtype=torch.long).to(
97
+ self.device
98
+ )
99
+
100
+ # ========== (in-batch) ITC loss ==========
101
+ gathered_video_feats = all_gather_with_grad(video_feat)
102
+ gathered_text_feats = all_gather_with_grad(text_feat)
103
+
104
+ sim_v2t = video_feat @ gathered_text_feats.t() / self.temp
105
+ sim_t2v = text_feat @ gathered_video_feats.t() / self.temp
106
+
107
+ sim_targets = torch.zeros_like(sim_v2t)
108
+
109
+ local_rank = get_rank()
110
+ b_start, b_end = b * local_rank, b * (local_rank + 1)
111
+ sim_targets[:, b_start:b_end] = torch.eye(b)
112
+
113
+ loss_v2t = -torch.sum(F.log_softmax(sim_v2t, dim=1) * sim_targets, dim=1).mean()
114
+ loss_t2v = -torch.sum(F.log_softmax(sim_t2v, dim=1) * sim_targets, dim=1).mean()
115
+
116
+ vtc_loss = (loss_v2t + loss_t2v) / 2
117
+
118
+ (
119
+ vtm_loss,
120
+ vtm_logits,
121
+ vtm_labels,
122
+ encoder_output,
123
+ encoder_output_neg,
124
+ ) = self.compute_vtm(
125
+ text_embeds=text_embeds,
126
+ text_atts=text.attention_mask,
127
+ image_embeds=video_embeds,
128
+ image_atts=video_atts,
129
+ sim_i2t=sim_v2t.clone(), # for hard mining
130
+ sim_t2i=sim_t2v.clone(), # for hard mining
131
+ )
132
+
133
+ loss = vtc_loss + vtm_loss
134
+
135
+ # return {"loss": loss}
136
+ return AlproOutput(
137
+ loss=loss,
138
+ loss_vtc=vtc_loss,
139
+ loss_vtm=vtm_loss,
140
+ intermediate_output=AlproIntermediateOutput(
141
+ video_embeds=video_embeds,
142
+ text_embeds=text_embeds,
143
+ encoder_output=encoder_output,
144
+ encoder_output_neg=encoder_output_neg,
145
+ vtm_logits=vtm_logits,
146
+ vtm_labels=vtm_labels,
147
+ ),
148
+ )
149
+
150
+ def compute_vtm(
151
+ self, text_embeds, text_atts, image_embeds, image_atts, sim_i2t, sim_t2i
152
+ ):
153
+ device = self.device
154
+
155
+ # ====== positive pairs =======
156
+ attention_mask = torch.cat([text_atts, image_atts], dim=1)
157
+ embedding_output_pos = torch.cat([text_embeds, image_embeds], dim=1)
158
+
159
+ encoder_outputs_pos = self.text_encoder(
160
+ encoder_embeds=embedding_output_pos,
161
+ attention_mask=attention_mask,
162
+ return_dict=True,
163
+ mode="fusion",
164
+ )
165
+
166
+ # ====== negative pairs =======
167
+ bs = text_embeds.shape[0]
168
+
169
+ local_rank = get_rank()
170
+ b_start, b_end = bs * local_rank, bs * (local_rank + 1)
171
+
172
+ with torch.no_grad():
173
+ weights_v2t = sim_i2t[:, b_start:b_end]
174
+ weights_t2v = sim_t2i[:, b_start:b_end]
175
+
176
+ # never select self as negative
177
+ weights_v2t.fill_diagonal_(-np.Inf)
178
+ weights_t2v.fill_diagonal_(-np.Inf)
179
+
180
+ weights_v2t = F.softmax(weights_v2t, dim=1)
181
+ weights_t2v = F.softmax(weights_t2v, dim=1)
182
+
183
+ # select a negative image for each text
184
+ # FIXME to optimize using indexing operations
185
+ image_embeds_neg = []
186
+ for b in range(bs):
187
+ neg_idx = torch.multinomial(weights_t2v[b], 1).item()
188
+ image_embeds_neg.append(image_embeds[neg_idx])
189
+ image_embeds_neg = torch.stack(image_embeds_neg, dim=0)
190
+
191
+ # select a negative text for each image
192
+ text_embeds_neg = []
193
+ text_atts_neg = []
194
+ for b in range(bs):
195
+ neg_idx = torch.multinomial(weights_v2t[b], 1).item()
196
+ text_embeds_neg.append(text_embeds[neg_idx])
197
+ text_atts_neg.append(text_atts[neg_idx])
198
+
199
+ text_embeds_neg = torch.stack(text_embeds_neg, dim=0)
200
+ text_atts_neg = torch.stack(text_atts_neg, dim=0)
201
+
202
+ text_embeds_all = torch.cat([text_embeds, text_embeds_neg], dim=0)
203
+ text_atts_all = torch.cat([text_atts, text_atts_neg], dim=0)
204
+
205
+ video_embeds_all = torch.cat([image_embeds_neg, image_embeds], dim=0)
206
+ video_atts_all = torch.cat([image_atts, image_atts], dim=0)
207
+
208
+ attention_mask_all = torch.cat([text_atts_all, video_atts_all], dim=1)
209
+ embedding_output_all = torch.cat([text_embeds_all, video_embeds_all], dim=1)
210
+
211
+ # forward negative pairs via cross encoder
212
+ encoder_outputs_neg = self.text_encoder(
213
+ encoder_embeds=embedding_output_all,
214
+ attention_mask=attention_mask_all,
215
+ return_dict=True,
216
+ mode="fusion",
217
+ )
218
+
219
+ vl_embeddings = torch.cat(
220
+ [
221
+ encoder_outputs_pos.last_hidden_state[:, 0, :],
222
+ encoder_outputs_neg.last_hidden_state[:, 0, :],
223
+ ],
224
+ dim=0,
225
+ )
226
+ vtm_logits = self.itm_head(vl_embeddings)
227
+
228
+ vtm_labels = torch.cat(
229
+ [torch.ones(bs, dtype=torch.long), torch.zeros(2 * bs, dtype=torch.long)],
230
+ dim=0,
231
+ ).to(device)
232
+ vtm_loss = F.cross_entropy(vtm_logits, vtm_labels)
233
+
234
+ return (
235
+ vtm_loss,
236
+ vtm_logits,
237
+ vtm_labels,
238
+ encoder_outputs_pos,
239
+ encoder_outputs_neg,
240
+ )
241
+
242
+ def compute_sim_matrix(self, data_loader, task_cfg):
243
+ k_test = task_cfg.get("k_test")
244
+
245
+ metric_logger = MetricLogger(delimiter=" ")
246
+ header = "Evaluation:"
247
+
248
+ logging.info("Computing features for evaluation...")
249
+ start_time = time.time()
250
+
251
+ texts = data_loader.dataset.text
252
+ num_text = len(texts)
253
+ text_bs = 256
254
+ text_ids = []
255
+ text_embeds = []
256
+ text_feats = []
257
+ text_atts = []
258
+ for i in range(0, num_text, text_bs):
259
+ text = texts[i : min(num_text, i + text_bs)]
260
+ text_input = self.tokenizer(
261
+ text,
262
+ padding="max_length",
263
+ truncation=True,
264
+ max_length=self.max_txt_len,
265
+ return_tensors="pt",
266
+ ).to(self.device)
267
+ text_output = self.text_encoder.forward_text(
268
+ text_input,
269
+ token_type_ids=torch.zeros(
270
+ text_input.input_ids.shape, dtype=torch.long, device=self.device
271
+ ),
272
+ )
273
+ text_feats.append(text_output.last_hidden_state.cpu())
274
+ text_embed = F.normalize(
275
+ self.text_proj(text_output.last_hidden_state[:, 0, :])
276
+ )
277
+ text_embeds.append(text_embed)
278
+ text_ids.append(text_input.input_ids)
279
+ text_atts.append(text_input.attention_mask)
280
+
281
+ text_embeds = torch.cat(text_embeds, dim=0)
282
+ text_ids = torch.cat(text_ids, dim=0)
283
+ text_atts = torch.cat(text_atts, dim=0)
284
+ text_feats = torch.cat(text_feats, dim=0)
285
+
286
+ video_feats = []
287
+ video_embeds = []
288
+ for samples in data_loader:
289
+ video = samples["video"]
290
+
291
+ video = video.to(self.device)
292
+ video_feat = self.visual_encoder.forward_features(video)
293
+ video_embed = self.vision_proj(video_feat[:, 0, :])
294
+ video_embed = F.normalize(video_embed, dim=-1)
295
+
296
+ video_feats.append(video_feat.cpu())
297
+ video_embeds.append(video_embed)
298
+
299
+ video_feats = torch.cat(video_feats, dim=0)
300
+ video_embeds = torch.cat(video_embeds, dim=0)
301
+
302
+ sims_matrix = video_embeds @ text_embeds.t()
303
+ score_matrix_v2t = torch.full(
304
+ (len(data_loader.dataset.image), len(texts)), -100.0
305
+ ).to(self.device)
306
+
307
+ num_tasks = dist_utils.get_world_size()
308
+ rank = dist_utils.get_rank()
309
+ step = sims_matrix.size(0) // num_tasks + 1
310
+ start = rank * step
311
+ end = min(sims_matrix.size(0), start + step)
312
+
313
+ # video-to-text
314
+ for i, sims in enumerate(
315
+ metric_logger.log_every(sims_matrix[start:end], 50, header)
316
+ ):
317
+ topk_sim, topk_idx = sims.topk(k=k_test, dim=0)
318
+
319
+ video_feats_repeat = (
320
+ video_feats[start + i].repeat(k_test, 1, 1).to(self.device)
321
+ )
322
+ video_atts_repeat = torch.ones(
323
+ video_feats_repeat.size()[:-1], dtype=torch.long
324
+ ).to(self.device)
325
+
326
+ attention_mask = torch.cat([text_atts[topk_idx], video_atts_repeat], dim=1)
327
+ embedding_output = torch.cat(
328
+ [text_feats[topk_idx].to(self.device), video_feats_repeat], dim=1
329
+ )
330
+
331
+ output = self.text_encoder(
332
+ encoder_embeds=embedding_output,
333
+ attention_mask=attention_mask,
334
+ return_dict=True,
335
+ mode="fusion",
336
+ )
337
+
338
+ score = self.itm_head(output.last_hidden_state[:, 0, :])[:, 1]
339
+ score_matrix_v2t[start + i, topk_idx] = score + topk_sim
340
+
341
+ # text-to-video
342
+ sims_matrix = sims_matrix.t()
343
+ score_matrix_t2v = torch.full(
344
+ (len(texts), len(data_loader.dataset.image)), -100.0
345
+ ).to(self.device)
346
+
347
+ step = sims_matrix.size(0) // num_tasks + 1
348
+ start = rank * step
349
+ end = min(sims_matrix.size(0), start + step)
350
+
351
+ for i, sims in enumerate(
352
+ metric_logger.log_every(sims_matrix[start:end], 50, header)
353
+ ):
354
+
355
+ topk_sim, topk_idx = sims.topk(k=k_test, dim=0)
356
+
357
+ text_feats_repeat = (
358
+ text_feats[start + i].repeat(k_test, 1, 1).to(self.device)
359
+ )
360
+ text_atts_repeat = text_atts[start + i].repeat(k_test, 1).to(self.device)
361
+
362
+ video_atts = torch.ones(
363
+ video_feats[topk_idx].size()[:-1], dtype=torch.long
364
+ ).to(self.device)
365
+
366
+ embedding_output = torch.cat(
367
+ [text_feats_repeat, video_feats[topk_idx].to(self.device)], dim=1
368
+ )
369
+ attention_mask = torch.cat([text_atts_repeat, video_atts], dim=1)
370
+
371
+ output = self.text_encoder(
372
+ encoder_embeds=embedding_output,
373
+ attention_mask=attention_mask,
374
+ return_dict=True,
375
+ mode="fusion",
376
+ )
377
+
378
+ score = self.itm_head(output.last_hidden_state[:, 0, :])[:, 1]
379
+ score_matrix_t2v[start + i, topk_idx] = score + topk_sim
380
+
381
+ if dist_utils.is_dist_avail_and_initialized():
382
+ dist.barrier()
383
+ torch.distributed.all_reduce(
384
+ score_matrix_v2t, op=torch.distributed.ReduceOp.SUM
385
+ )
386
+ torch.distributed.all_reduce(
387
+ score_matrix_t2v, op=torch.distributed.ReduceOp.SUM
388
+ )
389
+
390
+ total_time = time.time() - start_time
391
+ total_time_str = str(datetime.timedelta(seconds=int(total_time)))
392
+ logging.info("Evaluation time {}".format(total_time_str))
393
+
394
+ return score_matrix_v2t.cpu().numpy(), score_matrix_t2v.cpu().numpy()
395
+
396
+ @classmethod
397
+ def from_config(cls, cfg):
398
+ # vision encoder
399
+ visual_encoder_config = node_to_dict(cfg.timesformer)
400
+ visual_encoder = TimeSformer(**visual_encoder_config)
401
+
402
+ # text encoder
403
+ text_encoder = XBertEncoder.from_config(cfg)
404
+
405
+ max_txt_len = cfg.get("max_txt_len", 35)
406
+
407
+ model = cls(
408
+ visual_encoder=visual_encoder,
409
+ text_encoder=text_encoder,
410
+ max_txt_len=max_txt_len,
411
+ )
412
+
413
+ num_patches = (
414
+ visual_encoder_config["image_size"] // visual_encoder_config["patch_size"]
415
+ ) ** 2
416
+ num_frames = visual_encoder_config["n_frms"]
417
+
418
+ model.load_checkpoint_from_config(
419
+ cfg, num_frames=num_frames, num_patches=num_patches
420
+ )
421
+
422
+ return model
LAVIS-main/lavis/models/beats/BEATs.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --------------------------------------------------------
2
+ # BEATs: Audio Pre-Training with Acoustic Tokenizers (https://arxiv.org/abs/2212.09058)
3
+ # Github source: https://github.com/microsoft/unilm/tree/master/beats
4
+ # Copyright (c) 2022 Microsoft
5
+ # Licensed under The MIT License [see LICENSE for details]
6
+ # Based on fairseq code bases
7
+ # https://github.com/pytorch/fairseq
8
+ # --------------------------------------------------------
9
+
10
+
11
+ import torch
12
+ import torch.nn as nn
13
+ from torch.nn import LayerNorm
14
+ import torchaudio.compliance.kaldi as ta_kaldi
15
+
16
+ from lavis.models.beats.backbone import (
17
+ TransformerEncoder,
18
+ )
19
+
20
+ import logging
21
+ from typing import Optional
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class BEATsConfig:
27
+ def __init__(self, cfg=None):
28
+ self.input_patch_size: int = -1 # path size of patch embedding
29
+ self.embed_dim: int = 512 # patch embedding dimension
30
+ self.conv_bias: bool = False # include bias in conv encoder
31
+
32
+ self.encoder_layers: int = 12 # num encoder layers in the transformer
33
+ self.encoder_embed_dim: int = 768 # encoder embedding dimension
34
+ self.encoder_ffn_embed_dim: int = 3072 # encoder embedding dimension for FFN
35
+ self.encoder_attention_heads: int = 12 # num encoder attention heads
36
+ self.activation_fn: str = "gelu" # activation function to use
37
+
38
+ self.layer_wise_gradient_decay_ratio: float = 1.0 # ratio for layer-wise gradient decay
39
+ self.layer_norm_first: bool = False # apply layernorm first in the transformer
40
+ self.deep_norm: bool = False # apply deep_norm first in the transformer
41
+
42
+ # dropouts
43
+ self.dropout: float = 0.1 # dropout probability for the transformer
44
+ self.attention_dropout: float = 0.1 # dropout probability for attention weights
45
+ self.activation_dropout: float = 0.0 # dropout probability after activation in FFN
46
+ self.encoder_layerdrop: float = 0.0 # probability of dropping a tarnsformer layer
47
+ self.dropout_input: float = 0.0 # dropout to apply to the input (after feat extr)
48
+
49
+ # positional embeddings
50
+ self.conv_pos: int = 128 # number of filters for convolutional positional embeddings
51
+ self.conv_pos_groups: int = 16 # number of groups for convolutional positional embedding
52
+
53
+ # relative position embedding
54
+ self.relative_position_embedding: bool = False # apply relative position embedding
55
+ self.num_buckets: int = 320 # number of buckets for relative position embedding
56
+ self.max_distance: int = 1280 # maximum distance for relative position embedding
57
+ self.gru_rel_pos: bool = False # apply gated relative position embedding
58
+
59
+ # label predictor
60
+ self.finetuned_model: bool = False # whether the model is a fine-tuned model.
61
+ self.predictor_dropout: float = 0.1 # dropout probability for the predictor
62
+ self.predictor_class: int = 527 # target class number for the predictor
63
+
64
+ if cfg is not None:
65
+ self.update(cfg)
66
+
67
+ def update(self, cfg: dict):
68
+ self.__dict__.update(cfg)
69
+
70
+
71
+ class BEATs(nn.Module):
72
+ def __init__(
73
+ self,
74
+ cfg: BEATsConfig,
75
+ ) -> None:
76
+ super().__init__()
77
+ logger.info(f"BEATs Config: {cfg.__dict__}")
78
+
79
+ self.cfg = cfg
80
+
81
+ self.embed = cfg.embed_dim
82
+ self.post_extract_proj = (
83
+ nn.Linear(self.embed, cfg.encoder_embed_dim)
84
+ if self.embed != cfg.encoder_embed_dim
85
+ else None
86
+ )
87
+
88
+ self.input_patch_size = cfg.input_patch_size
89
+ self.patch_embedding = nn.Conv2d(1, self.embed, kernel_size=self.input_patch_size, stride=self.input_patch_size,
90
+ bias=cfg.conv_bias)
91
+
92
+ self.dropout_input = nn.Dropout(cfg.dropout_input)
93
+
94
+ assert not cfg.deep_norm or not cfg.layer_norm_first
95
+ self.encoder = TransformerEncoder(cfg)
96
+ self.layer_norm = LayerNorm(self.embed)
97
+
98
+ if cfg.finetuned_model:
99
+ self.predictor_dropout = nn.Dropout(cfg.predictor_dropout)
100
+ self.predictor = nn.Linear(cfg.encoder_embed_dim, cfg.predictor_class)
101
+ else:
102
+ self.predictor = None
103
+
104
+ def forward_padding_mask(
105
+ self,
106
+ features: torch.Tensor,
107
+ padding_mask: torch.Tensor,
108
+ ) -> torch.Tensor:
109
+ extra = padding_mask.size(1) % features.size(1)
110
+ if extra > 0:
111
+ padding_mask = padding_mask[:, :-extra]
112
+ padding_mask = padding_mask.view(
113
+ padding_mask.size(0), features.size(1), -1
114
+ )
115
+ padding_mask = padding_mask.all(-1)
116
+ return padding_mask
117
+
118
+ def preprocess(
119
+ self,
120
+ source: torch.Tensor,
121
+ fbank_mean: float = 15.41663,
122
+ fbank_std: float = 6.55582,
123
+ ) -> torch.Tensor:
124
+ fbanks = []
125
+ for waveform in source:
126
+ waveform = waveform.unsqueeze(0) * 2 ** 15
127
+ fbank = ta_kaldi.fbank(waveform, num_mel_bins=128, sample_frequency=16000, frame_length=25, frame_shift=10)
128
+ fbanks.append(fbank)
129
+ fbank = torch.stack(fbanks, dim=0)
130
+ fbank = (fbank - fbank_mean) / (2 * fbank_std)
131
+ return fbank
132
+
133
+ def extract_features(
134
+ self,
135
+ fbank: torch.Tensor,
136
+ padding_mask: Optional[torch.Tensor] = None,
137
+ fbank_mean: float = 15.41663,
138
+ fbank_std: float = 6.55582,
139
+ ):
140
+ ## NOTE: preprocessing is done separately in lavis.processsors.audio_processors.BeatsAudioProcessor
141
+ # fbank = self.preprocess(source, fbank_mean=fbank_mean, fbank_std=fbank_std)
142
+
143
+ if padding_mask is not None:
144
+ padding_mask = self.forward_padding_mask(fbank, padding_mask)
145
+
146
+ fbank = fbank.unsqueeze(1)
147
+ features = self.patch_embedding(fbank)
148
+ features = features.reshape(features.shape[0], features.shape[1], -1)
149
+ features = features.transpose(1, 2)
150
+ features = self.layer_norm(features)
151
+
152
+ if padding_mask is not None:
153
+ padding_mask = self.forward_padding_mask(features, padding_mask)
154
+
155
+ if self.post_extract_proj is not None:
156
+ features = self.post_extract_proj(features)
157
+
158
+ x = self.dropout_input(features)
159
+
160
+ x, layer_results = self.encoder(
161
+ x,
162
+ padding_mask=padding_mask,
163
+ )
164
+
165
+ if self.predictor is not None:
166
+ x = self.predictor_dropout(x)
167
+ logits = self.predictor(x)
168
+
169
+ if padding_mask is not None and padding_mask.any():
170
+ logits[padding_mask] = 0
171
+ logits = logits.sum(dim=1)
172
+ logits = logits / (~padding_mask).sum(dim=1).unsqueeze(-1).expand_as(logits)
173
+ else:
174
+ logits = logits.mean(dim=1)
175
+
176
+ lprobs = torch.sigmoid(logits)
177
+
178
+ return lprobs, padding_mask
179
+ else:
180
+ return x, padding_mask
LAVIS-main/lavis/models/beats/LICENSE_BEATs.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) Microsoft Corporation
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
LAVIS-main/lavis/models/beats/README.md ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # BEATs
3
+
4
+ [**BEATs**](https://arxiv.org/abs/2212.09058): **Audio Pre-Training with Acoustic Tokenizers**
5
+
6
+ Official PyTorch implementation and pretrained models of BEATs
7
+
8
+ ## Pre-Trained and Fine-Tuned Tokenizers and Models
9
+ Iterations | Tokenizer | Pre-Trained Model | AudioSet Fine-Tuned Model 1 | AudioSet Fine-Tuned Model 2
10
+ |---|---|---|---|---
11
+ Iter1 | Random Projection | [BEATs_iter1](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter1.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) | [Fine-tuned BEATs_iter1 (cpt1)](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter1_finetuned_on_AS2M_cpt1.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) | [Fine-tuned BEATs_iter1 (cpt2)](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter1_finetuned_on_AS2M_cpt2.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) |
12
+ Iter2 | [Tokenizer_iter2](https://valle.blob.core.windows.net/share/BEATs/Tokenizer_iter2.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D)| [BEATs_iter2](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter2.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) | [Fine-tuned BEATs_iter2 (cpt1)](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter2_finetuned_on_AS2M_cpt1.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) | [Fine-tuned BEATs_iter2 (cpt2)](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter2_finetuned_on_AS2M_cpt2.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) |
13
+ Iter3 | [Tokenizer_iter3](https://valle.blob.core.windows.net/share/BEATs/Tokenizer_iter3.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D)| [BEATs_iter3](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter3.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) | [Fine-tuned BEATs_iter3 (cpt1)](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter3_finetuned_on_AS2M_cpt1.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) | [Fine-tuned BEATs_iter3 (cpt2)](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter3_finetuned_on_AS2M_cpt2.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) |
14
+ Iter3+ | [Tokenizer_iter3+ (AS20K)](https://valle.blob.core.windows.net/share/BEATs/Tokenizer_iter3_plus_AS20K.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D)| [BEATs_iter3+ (AS20K)](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter3_plus_AS20K.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) | [Fine-tuned BEATs_iter3+ (AS20K) (cpt1)](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter3_plus_AS20K_finetuned_on_AS2M_cpt1.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) | [Fine-tuned BEATs_iter3+ (AS20K) (cpt2)](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter3_plus_AS20K_finetuned_on_AS2M_cpt2.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) |
15
+ Iter3+ | [Tokenizer_iter3+ (AS2M)](https://valle.blob.core.windows.net/share/BEATs/Tokenizer_iter3_plus_AS2M.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D)| [BEATs_iter3+ (AS2M)](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter3_plus_AS2M.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) | [Fine-tuned BEATs_iter3+ (AS2M) (cpt1)](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt1.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) | [Fine-tuned BEATs_iter3+ (AS2M) (cpt2)](https://valle.blob.core.windows.net/share/BEATs/BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt?sv=2020-08-04&st=2023-03-01T07%3A51%3A05Z&se=2033-03-02T07%3A51%3A00Z&sr=c&sp=rl&sig=QJXmSJG9DbMKf48UDIU1MfzIro8HQOf3sqlNXiflY1I%3D) |
16
+
17
+
18
+ ### Load Tokenizers
19
+
20
+ ```python
21
+ import torch
22
+ from Tokenizers import TokenizersConfig, Tokenizers
23
+
24
+ # load the pre-trained checkpoints
25
+ checkpoint = torch.load('/path/to/tokenizer.pt')
26
+
27
+ cfg = TokenizersConfig(checkpoint['cfg'])
28
+ BEATs_tokenizer = Tokenizers(cfg)
29
+ BEATs_tokenizer.load_state_dict(checkpoint['model'])
30
+ BEATs_tokenizer.eval()
31
+
32
+ # tokenize the audio and generate the labels
33
+ audio_input_16khz = torch.randn(1, 10000)
34
+ padding_mask = torch.zeros(1, 10000).bool()
35
+
36
+ labels = BEATs_tokenizer.extract_labels(audio_input_16khz, padding_mask=padding_mask)
37
+ ```
38
+
39
+
40
+ ### Load Pre-Trained Models
41
+
42
+ ```python
43
+ import torch
44
+ from BEATs import BEATs, BEATsConfig
45
+
46
+ # load the pre-trained checkpoints
47
+ checkpoint = torch.load('/path/to/model.pt')
48
+
49
+ cfg = BEATsConfig(checkpoint['cfg'])
50
+ BEATs_model = BEATs(cfg)
51
+ BEATs_model.load_state_dict(checkpoint['model'])
52
+ BEATs_model.eval()
53
+
54
+ # extract the the audio representation
55
+ audio_input_16khz = torch.randn(1, 10000)
56
+ padding_mask = torch.zeros(1, 10000).bool()
57
+
58
+ representation = BEATs_model.extract_features(audio_input_16khz, padding_mask=padding_mask)[0]
59
+ ```
60
+
61
+
62
+ ### Load Fine-tuned Models
63
+
64
+ ```python
65
+ import torch
66
+ from BEATs import BEATs, BEATsConfig
67
+
68
+ # load the fine-tuned checkpoints
69
+ checkpoint = torch.load('/path/to/model.pt')
70
+
71
+ cfg = BEATsConfig(checkpoint['cfg'])
72
+ BEATs_model = BEATs(cfg)
73
+ BEATs_model.load_state_dict(checkpoint['model'])
74
+ BEATs_model.eval()
75
+
76
+ # predict the classification probability of each class
77
+ audio_input_16khz = torch.randn(3, 10000)
78
+ padding_mask = torch.zeros(3, 10000).bool()
79
+
80
+ probs = BEATs_model.extract_features(audio_input_16khz, padding_mask=padding_mask)[0]
81
+
82
+ for i, (top5_label_prob, top5_label_idx) in enumerate(zip(*probs.topk(k=5))):
83
+ top5_label = [checkpoint['label_dict'][label_idx.item()] for label_idx in top5_label_idx]
84
+ print(f'Top 5 predicted labels of the {i}th audio are {top5_label} with probability of {top5_label_prob}')
85
+ ```
86
+
87
+ ## Evaluation Results
88
+
89
+ ### Comparing with the SOTA Single Models
90
+ ![alt text](Evaluation_Results/Comparing_with_the_SOTA_Single_Models.png)
91
+
92
+
93
+ ### Comparing with the SOTA Ensemble Models
94
+ ![alt text](Evaluation_Results/Comparing_with_the_SOTA_Ensemble_Models.png)
95
+
96
+
97
+ ### Comparing Different BEATS Tokenizers
98
+ ![alt text](Evaluation_Results/Comparing_Different_BEATS_Tokenizers.png)
99
+
100
+
101
+ ### Comparing Different Pre-Training Targets
102
+ ![alt text](Evaluation_Results/Comparing_Different_Pre-Training_Targets.png)
103
+
104
+
105
+ ## License
106
+ This project is licensed under the license found in the LICENSE file in the root directory of this source tree.
107
+ Portions of the source code are based on the [FAIRSEQ](https://github.com/pytorch/fairseq) and [VQGAN](https://github.com/CompVis/taming-transformers) project.
108
+
109
+ [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct)
110
+
111
+
112
+ ### Reference
113
+ If you find our work is useful in your research, please cite the following paper:
114
+ ``` latex
115
+ @article{Chen2022beats,
116
+ title = {BEATs: Audio Pre-Training with Acoustic Tokenizers},
117
+ author = {Sanyuan Chen and Yu Wu and Chengyi Wang and Shujie Liu and Daniel Tompkins and Zhuo Chen and Furu Wei},
118
+ eprint={2212.09058},
119
+ archivePrefix={arXiv},
120
+ year={2022}
121
+ }
122
+ ```
123
+ ### Contact Information
124
+
125
+ For help or issues using BEATs models, please submit a GitHub issue.
126
+
127
+ For other communications related to BEATs, please contact Yu Wu (`yuwu1@microsoft.com`).
LAVIS-main/lavis/models/beats/Tokenizers.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --------------------------------------------------------
2
+ # BEATs: Audio Pre-Training with Acoustic Tokenizers (https://arxiv.org/abs/2212.09058)
3
+ # Github source: https://github.com/microsoft/unilm/tree/master/beats
4
+ # Copyright (c) 2022 Microsoft
5
+ # Licensed under The MIT License [see LICENSE for details]
6
+ # Based on fairseq code bases
7
+ # https://github.com/pytorch/fairseq
8
+ # --------------------------------------------------------
9
+
10
+
11
+ import torch
12
+ import torch.nn as nn
13
+ from torch.nn import LayerNorm
14
+ import torchaudio.compliance.kaldi as ta_kaldi
15
+
16
+ from lavis.models.beats.backbone import (
17
+ TransformerEncoder,
18
+ )
19
+ from lavis.models.beats.quantizer import (
20
+ NormEMAVectorQuantizer,
21
+ )
22
+
23
+ import logging
24
+ from typing import Optional
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class TokenizersConfig:
30
+ def __init__(self, cfg=None):
31
+ self.input_patch_size: int = -1 # path size of patch embedding
32
+ self.embed_dim: int = 512 # patch embedding dimension
33
+ self.conv_bias: bool = False # include bias in conv encoder
34
+
35
+ self.encoder_layers: int = 12 # num encoder layers in the transformer
36
+ self.encoder_embed_dim: int = 768 # encoder embedding dimension
37
+ self.encoder_ffn_embed_dim: int = 3072 # encoder embedding dimension for FFN
38
+ self.encoder_attention_heads: int = 12 # num encoder attention heads
39
+ self.activation_fn: str = "gelu" # activation function to use
40
+
41
+ self.layer_norm_first: bool = False # apply layernorm first in the transformer
42
+ self.deep_norm: bool = False # apply deep_norm first in the transformer
43
+
44
+ # dropouts
45
+ self.dropout: float = 0.1 # dropout probability for the transformer
46
+ self.attention_dropout: float = 0.1 # dropout probability for attention weights
47
+ self.activation_dropout: float = 0.0 # dropout probability after activation in FFN
48
+ self.encoder_layerdrop: float = 0.0 # probability of dropping a tarnsformer layer
49
+ self.dropout_input: float = 0.0 # dropout to apply to the input (after feat extr)
50
+
51
+ # positional embeddings
52
+ self.conv_pos: int = 128 # number of filters for convolutional positional embeddings
53
+ self.conv_pos_groups: int = 16 # number of groups for convolutional positional embedding
54
+
55
+ # relative position embedding
56
+ self.relative_position_embedding: bool = False # apply relative position embedding
57
+ self.num_buckets: int = 320 # number of buckets for relative position embedding
58
+ self.max_distance: int = 1280 # maximum distance for relative position embedding
59
+ self.gru_rel_pos: bool = False # apply gated relative position embedding
60
+
61
+ # quantizer
62
+ self.quant_n: int = 1024 # codebook number in quantizer
63
+ self.quant_dim: int = 256 # codebook dimension in quantizer
64
+
65
+ if cfg is not None:
66
+ self.update(cfg)
67
+
68
+ def update(self, cfg: dict):
69
+ self.__dict__.update(cfg)
70
+
71
+
72
+ class Tokenizers(nn.Module):
73
+ def __init__(
74
+ self,
75
+ cfg: TokenizersConfig,
76
+ ) -> None:
77
+ super().__init__()
78
+ logger.info(f"Tokenizers Config: {cfg.__dict__}")
79
+
80
+ self.cfg = cfg
81
+
82
+ self.embed = cfg.embed_dim
83
+ self.post_extract_proj = (
84
+ nn.Linear(self.embed, cfg.encoder_embed_dim)
85
+ if self.embed != cfg.encoder_embed_dim
86
+ else None
87
+ )
88
+
89
+ self.input_patch_size = cfg.input_patch_size
90
+ self.patch_embedding = nn.Conv2d(1, self.embed, kernel_size=self.input_patch_size, stride=self.input_patch_size,
91
+ bias=cfg.conv_bias)
92
+
93
+ self.dropout_input = nn.Dropout(cfg.dropout_input)
94
+
95
+ assert not cfg.deep_norm or not cfg.layer_norm_first
96
+ self.encoder = TransformerEncoder(cfg)
97
+ self.layer_norm = LayerNorm(self.embed)
98
+
99
+ self.quantize = NormEMAVectorQuantizer(
100
+ n_embed=cfg.quant_n, embedding_dim=cfg.quant_dim, beta=1.0, kmeans_init=True, decay=0.99,
101
+ )
102
+ self.quant_n = cfg.quant_n
103
+ self.quantize_layer = nn.Sequential(
104
+ nn.Linear(cfg.encoder_embed_dim, cfg.encoder_embed_dim),
105
+ nn.Tanh(),
106
+ nn.Linear(cfg.encoder_embed_dim, cfg.quant_dim) # for quantize
107
+ )
108
+
109
+ def forward_padding_mask(
110
+ self,
111
+ features: torch.Tensor,
112
+ padding_mask: torch.Tensor,
113
+ ) -> torch.Tensor:
114
+ extra = padding_mask.size(1) % features.size(1)
115
+ if extra > 0:
116
+ padding_mask = padding_mask[:, :-extra]
117
+ padding_mask = padding_mask.view(
118
+ padding_mask.size(0), features.size(1), -1
119
+ )
120
+ padding_mask = padding_mask.all(-1)
121
+ return padding_mask
122
+
123
+ def preprocess(
124
+ self,
125
+ source: torch.Tensor,
126
+ fbank_mean: float = 15.41663,
127
+ fbank_std: float = 6.55582,
128
+ ) -> torch.Tensor:
129
+ fbanks = []
130
+ for waveform in source:
131
+ waveform = waveform.unsqueeze(0) * 2 ** 15
132
+ fbank = ta_kaldi.fbank(waveform, num_mel_bins=128, sample_frequency=16000, frame_length=25, frame_shift=10)
133
+ fbanks.append(fbank)
134
+ fbank = torch.stack(fbanks, dim=0)
135
+ fbank = (fbank - fbank_mean) / (2 * fbank_std)
136
+ return fbank
137
+
138
+ def extract_labels(
139
+ self,
140
+ source: torch.Tensor,
141
+ padding_mask: Optional[torch.Tensor] = None,
142
+ fbank_mean: float = 15.41663,
143
+ fbank_std: float = 6.55582,
144
+ ):
145
+ fbank = self.preprocess(source, fbank_mean=fbank_mean, fbank_std=fbank_std)
146
+
147
+ if padding_mask is not None:
148
+ padding_mask = self.forward_padding_mask(fbank, padding_mask)
149
+
150
+ fbank = fbank.unsqueeze(1)
151
+ features = self.patch_embedding(fbank)
152
+ features = features.reshape(features.shape[0], features.shape[1], -1)
153
+ features = features.transpose(1, 2)
154
+ features = self.layer_norm(features)
155
+
156
+ if padding_mask is not None:
157
+ padding_mask = self.forward_padding_mask(features, padding_mask)
158
+
159
+ if self.post_extract_proj is not None:
160
+ features = self.post_extract_proj(features)
161
+
162
+ x = self.dropout_input(features)
163
+
164
+ x, layer_results = self.encoder(
165
+ x,
166
+ padding_mask=padding_mask,
167
+ )
168
+
169
+ quantize_input = self.quantize_layer(x)
170
+ quantize_feature, embed_loss, embed_ind = self.quantize(quantize_input)
171
+
172
+ return embed_ind
173
+
LAVIS-main/lavis/models/beats/backbone.py ADDED
@@ -0,0 +1,783 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --------------------------------------------------------
2
+ # BEATs: Audio Pre-Training with Acoustic Tokenizers (https://arxiv.org/abs/2212.09058)
3
+ # Github source: https://github.com/microsoft/unilm/tree/master/beats
4
+ # Copyright (c) 2022 Microsoft
5
+ # Licensed under The MIT License [see LICENSE for details]
6
+ # Based on fairseq code bases
7
+ # https://github.com/pytorch/fairseq
8
+ # --------------------------------------------------------
9
+
10
+ import math
11
+ import numpy as np
12
+ from typing import Dict, Optional, Tuple
13
+ import torch
14
+ from torch import Tensor, nn
15
+ import torch.nn.functional as F
16
+ from torch.nn import LayerNorm, Parameter
17
+ from lavis.models.beats.modules import (
18
+ GradMultiply,
19
+ SamePad,
20
+ get_activation_fn,
21
+ GLU_Linear,
22
+ quant_noise,
23
+ )
24
+
25
+
26
+ class TransformerEncoder(nn.Module):
27
+ def __init__(self, args):
28
+ super().__init__()
29
+
30
+ self.dropout = args.dropout
31
+ self.embedding_dim = args.encoder_embed_dim
32
+
33
+ self.pos_conv = nn.Conv1d(
34
+ self.embedding_dim,
35
+ self.embedding_dim,
36
+ kernel_size=args.conv_pos,
37
+ padding=args.conv_pos // 2,
38
+ groups=args.conv_pos_groups,
39
+ )
40
+ dropout = 0
41
+ std = math.sqrt((4 * (1.0 - dropout)) / (args.conv_pos * self.embedding_dim))
42
+ nn.init.normal_(self.pos_conv.weight, mean=0, std=std)
43
+ nn.init.constant_(self.pos_conv.bias, 0)
44
+
45
+ self.pos_conv = nn.utils.weight_norm(self.pos_conv, name="weight", dim=2)
46
+ self.pos_conv = nn.Sequential(self.pos_conv, SamePad(args.conv_pos), nn.GELU())
47
+
48
+ if hasattr(args, "relative_position_embedding"):
49
+ self.relative_position_embedding = args.relative_position_embedding
50
+ self.num_buckets = args.num_buckets
51
+ self.max_distance = args.max_distance
52
+ else:
53
+ self.relative_position_embedding = False
54
+ self.num_buckets = 0
55
+ self.max_distance = 0
56
+
57
+ self.layers = nn.ModuleList(
58
+ [
59
+ TransformerSentenceEncoderLayer(
60
+ embedding_dim=self.embedding_dim,
61
+ ffn_embedding_dim=args.encoder_ffn_embed_dim,
62
+ num_attention_heads=args.encoder_attention_heads,
63
+ dropout=self.dropout,
64
+ attention_dropout=args.attention_dropout,
65
+ activation_dropout=args.activation_dropout,
66
+ activation_fn=args.activation_fn,
67
+ layer_norm_first=args.layer_norm_first,
68
+ deep_norm=args.deep_norm,
69
+ has_relative_attention_bias=self.relative_position_embedding,
70
+ num_buckets=self.num_buckets,
71
+ max_distance=self.max_distance,
72
+ gru_rel_pos=args.gru_rel_pos,
73
+ encoder_layers=args.encoder_layers,
74
+ )
75
+ for i in range(args.encoder_layers)
76
+ ]
77
+ )
78
+ if self.relative_position_embedding:
79
+ for i in range(1, args.encoder_layers):
80
+ del self.layers[i].self_attn.relative_attention_bias
81
+ self.layers[i].self_attn.relative_attention_bias = self.layers[0].self_attn.relative_attention_bias
82
+
83
+ self.layer_norm_first = args.layer_norm_first
84
+ self.layer_norm = LayerNorm(self.embedding_dim)
85
+ self.layerdrop = args.encoder_layerdrop
86
+
87
+ self.apply(init_bert_params)
88
+
89
+ if args.deep_norm:
90
+ deep_norm_beta = math.pow(8 * args.encoder_layers, -1 / 4)
91
+ for i in range(args.encoder_layers):
92
+ nn.init.xavier_normal_(self.layers[i].self_attn.k_proj.weight, gain=1)
93
+ nn.init.xavier_normal_(self.layers[i].self_attn.v_proj.weight, gain=deep_norm_beta)
94
+ nn.init.xavier_normal_(self.layers[i].self_attn.q_proj.weight, gain=1)
95
+ nn.init.xavier_normal_(self.layers[i].self_attn.out_proj.weight, gain=deep_norm_beta)
96
+ nn.init.xavier_normal_(self.layers[i].fc1.weight, gain=deep_norm_beta)
97
+ nn.init.xavier_normal_(self.layers[i].fc2.weight, gain=deep_norm_beta)
98
+
99
+ self.layer_wise_gradient_decay_ratio = getattr(args, "layer_wise_gradient_decay_ratio", 1)
100
+
101
+ def forward(self, x, padding_mask=None, layer=None):
102
+ x, layer_results = self.extract_features(x, padding_mask, layer)
103
+
104
+ if self.layer_norm_first and layer is None:
105
+ x = self.layer_norm(x)
106
+
107
+ return x, layer_results
108
+
109
+ def extract_features(self, x, padding_mask=None, tgt_layer=None):
110
+
111
+ if padding_mask is not None:
112
+ x[padding_mask] = 0
113
+
114
+ x_conv = self.pos_conv(x.transpose(1, 2))
115
+ x_conv = x_conv.transpose(1, 2)
116
+ x = x + x_conv
117
+
118
+ if not self.layer_norm_first:
119
+ x = self.layer_norm(x)
120
+
121
+ x = F.dropout(x, p=self.dropout, training=self.training)
122
+
123
+ # B x T x C -> T x B x C
124
+ x = x.transpose(0, 1)
125
+
126
+ layer_results = []
127
+ z = None
128
+ if tgt_layer is not None:
129
+ layer_results.append((x, z))
130
+ r = None
131
+ pos_bias = None
132
+ for i, layer in enumerate(self.layers):
133
+ if self.layer_wise_gradient_decay_ratio != 1.0:
134
+ x = GradMultiply.apply(x, self.layer_wise_gradient_decay_ratio)
135
+ dropout_probability = np.random.random()
136
+ if not self.training or (dropout_probability > self.layerdrop):
137
+ x, z, pos_bias = layer(x, self_attn_padding_mask=padding_mask, need_weights=False, pos_bias=pos_bias)
138
+ if tgt_layer is not None:
139
+ layer_results.append((x, z))
140
+ if i == tgt_layer:
141
+ r = x
142
+ break
143
+
144
+ if r is not None:
145
+ x = r
146
+
147
+ # T x B x C -> B x T x C
148
+ x = x.transpose(0, 1)
149
+
150
+ return x, layer_results
151
+
152
+
153
+ class TransformerSentenceEncoderLayer(nn.Module):
154
+ def __init__(
155
+ self,
156
+ embedding_dim: float = 768,
157
+ ffn_embedding_dim: float = 3072,
158
+ num_attention_heads: float = 8,
159
+ dropout: float = 0.1,
160
+ attention_dropout: float = 0.1,
161
+ activation_dropout: float = 0.1,
162
+ activation_fn: str = "relu",
163
+ layer_norm_first: bool = False,
164
+ deep_norm: bool = False,
165
+ has_relative_attention_bias: bool = False,
166
+ num_buckets: int = 0,
167
+ max_distance: int = 0,
168
+ rescale_init: bool = False,
169
+ gru_rel_pos: bool = False,
170
+ encoder_layers: int = 0,
171
+ ) -> None:
172
+
173
+ super().__init__()
174
+ self.embedding_dim = embedding_dim
175
+ self.dropout = dropout
176
+ self.activation_dropout = activation_dropout
177
+
178
+ self.activation_name = activation_fn
179
+ self.activation_fn = get_activation_fn(activation_fn)
180
+ self.self_attn = MultiheadAttention(
181
+ self.embedding_dim,
182
+ num_attention_heads,
183
+ dropout=attention_dropout,
184
+ self_attention=True,
185
+ has_relative_attention_bias=has_relative_attention_bias,
186
+ num_buckets=num_buckets,
187
+ max_distance=max_distance,
188
+ rescale_init=rescale_init,
189
+ gru_rel_pos=gru_rel_pos,
190
+ )
191
+
192
+ self.dropout1 = nn.Dropout(dropout)
193
+ self.dropout2 = nn.Dropout(self.activation_dropout)
194
+ self.dropout3 = nn.Dropout(dropout)
195
+
196
+ self.layer_norm_first = layer_norm_first
197
+
198
+ self.self_attn_layer_norm = LayerNorm(self.embedding_dim)
199
+
200
+ if self.activation_name == "glu":
201
+ self.fc1 = GLU_Linear(self.embedding_dim, ffn_embedding_dim, "swish")
202
+ else:
203
+ self.fc1 = nn.Linear(self.embedding_dim, ffn_embedding_dim)
204
+ self.fc2 = nn.Linear(ffn_embedding_dim, self.embedding_dim)
205
+
206
+ self.final_layer_norm = LayerNorm(self.embedding_dim)
207
+
208
+ self.deep_norm = deep_norm
209
+ if self.deep_norm:
210
+ self.deep_norm_alpha = math.pow(2 * encoder_layers, 1 / 4)
211
+ else:
212
+ self.deep_norm_alpha = 1
213
+
214
+ def forward(
215
+ self,
216
+ x: torch.Tensor,
217
+ self_attn_mask: torch.Tensor = None,
218
+ self_attn_padding_mask: torch.Tensor = None,
219
+ need_weights: bool = False,
220
+ pos_bias=None
221
+ ):
222
+ residual = x
223
+
224
+ if self.layer_norm_first:
225
+ x = self.self_attn_layer_norm(x)
226
+ x, attn, pos_bias = self.self_attn(
227
+ query=x,
228
+ key=x,
229
+ value=x,
230
+ key_padding_mask=self_attn_padding_mask,
231
+ need_weights=False,
232
+ attn_mask=self_attn_mask,
233
+ position_bias=pos_bias
234
+ )
235
+ x = self.dropout1(x)
236
+ x = residual + x
237
+
238
+ residual = x
239
+ x = self.final_layer_norm(x)
240
+ if self.activation_name == "glu":
241
+ x = self.fc1(x)
242
+ else:
243
+ x = self.activation_fn(self.fc1(x))
244
+ x = self.dropout2(x)
245
+ x = self.fc2(x)
246
+ x = self.dropout3(x)
247
+ x = residual + x
248
+ else:
249
+ x, attn, pos_bias = self.self_attn(
250
+ query=x,
251
+ key=x,
252
+ value=x,
253
+ key_padding_mask=self_attn_padding_mask,
254
+ need_weights=need_weights,
255
+ attn_mask=self_attn_mask,
256
+ position_bias=pos_bias
257
+ )
258
+
259
+ x = self.dropout1(x)
260
+ x = residual * self.deep_norm_alpha + x
261
+
262
+ x = self.self_attn_layer_norm(x)
263
+
264
+ residual = x
265
+ if self.activation_name == "glu":
266
+ x = self.fc1(x)
267
+ else:
268
+ x = self.activation_fn(self.fc1(x))
269
+ x = self.dropout2(x)
270
+ x = self.fc2(x)
271
+ x = self.dropout3(x)
272
+ x = residual * self.deep_norm_alpha + x
273
+ x = self.final_layer_norm(x)
274
+
275
+ return x, attn, pos_bias
276
+
277
+
278
+ class MultiheadAttention(nn.Module):
279
+ """Multi-headed attention.
280
+
281
+ See "Attention Is All You Need" for more details.
282
+ """
283
+
284
+ def __init__(
285
+ self,
286
+ embed_dim,
287
+ num_heads,
288
+ kdim=None,
289
+ vdim=None,
290
+ dropout=0.0,
291
+ bias=True,
292
+ add_bias_kv=False,
293
+ add_zero_attn=False,
294
+ self_attention=False,
295
+ encoder_decoder_attention=False,
296
+ q_noise=0.0,
297
+ qn_block_size=8,
298
+ has_relative_attention_bias=False,
299
+ num_buckets=32,
300
+ max_distance=128,
301
+ gru_rel_pos=False,
302
+ rescale_init=False,
303
+ ):
304
+ super().__init__()
305
+ self.embed_dim = embed_dim
306
+ self.kdim = kdim if kdim is not None else embed_dim
307
+ self.vdim = vdim if vdim is not None else embed_dim
308
+ self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim
309
+
310
+ self.num_heads = num_heads
311
+ self.dropout_module = nn.Dropout(dropout)
312
+
313
+ self.has_relative_attention_bias = has_relative_attention_bias
314
+ self.num_buckets = num_buckets
315
+ self.max_distance = max_distance
316
+ if self.has_relative_attention_bias:
317
+ self.relative_attention_bias = nn.Embedding(num_buckets, num_heads)
318
+
319
+ self.head_dim = embed_dim // num_heads
320
+ self.q_head_dim = self.head_dim
321
+ self.k_head_dim = self.head_dim
322
+ assert (
323
+ self.head_dim * num_heads == self.embed_dim
324
+ ), "embed_dim must be divisible by num_heads"
325
+ self.scaling = self.head_dim ** -0.5
326
+
327
+ self.self_attention = self_attention
328
+ self.encoder_decoder_attention = encoder_decoder_attention
329
+
330
+ assert not self.self_attention or self.qkv_same_dim, (
331
+ "Self-attention requires query, key and " "value to be of the same size"
332
+ )
333
+
334
+ k_bias = True
335
+ if rescale_init:
336
+ k_bias = False
337
+
338
+ k_embed_dim = embed_dim
339
+ q_embed_dim = embed_dim
340
+
341
+ self.k_proj = quant_noise(
342
+ nn.Linear(self.kdim, k_embed_dim, bias=k_bias), q_noise, qn_block_size
343
+ )
344
+ self.v_proj = quant_noise(
345
+ nn.Linear(self.vdim, embed_dim, bias=bias), q_noise, qn_block_size
346
+ )
347
+ self.q_proj = quant_noise(
348
+ nn.Linear(embed_dim, q_embed_dim, bias=bias), q_noise, qn_block_size
349
+ )
350
+
351
+ self.out_proj = quant_noise(
352
+ nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size
353
+ )
354
+
355
+ if add_bias_kv:
356
+ self.bias_k = Parameter(torch.Tensor(1, 1, embed_dim))
357
+ self.bias_v = Parameter(torch.Tensor(1, 1, embed_dim))
358
+ else:
359
+ self.bias_k = self.bias_v = None
360
+
361
+ self.add_zero_attn = add_zero_attn
362
+
363
+ self.gru_rel_pos = gru_rel_pos
364
+ if self.gru_rel_pos:
365
+ self.grep_linear = nn.Linear(self.q_head_dim, 8)
366
+ self.grep_a = nn.Parameter(torch.ones(1, num_heads, 1, 1))
367
+
368
+ self.reset_parameters()
369
+
370
+ def reset_parameters(self):
371
+ if self.qkv_same_dim:
372
+ # Empirically observed the convergence to be much better with
373
+ # the scaled initialization
374
+ nn.init.xavier_uniform_(self.k_proj.weight, gain=1 / math.sqrt(2))
375
+ nn.init.xavier_uniform_(self.v_proj.weight, gain=1 / math.sqrt(2))
376
+ nn.init.xavier_uniform_(self.q_proj.weight, gain=1 / math.sqrt(2))
377
+ else:
378
+ nn.init.xavier_uniform_(self.k_proj.weight)
379
+ nn.init.xavier_uniform_(self.v_proj.weight)
380
+ nn.init.xavier_uniform_(self.q_proj.weight)
381
+
382
+ nn.init.xavier_uniform_(self.out_proj.weight)
383
+ if self.out_proj.bias is not None:
384
+ nn.init.constant_(self.out_proj.bias, 0.0)
385
+ if self.bias_k is not None:
386
+ nn.init.xavier_normal_(self.bias_k)
387
+ if self.bias_v is not None:
388
+ nn.init.xavier_normal_(self.bias_v)
389
+ if self.has_relative_attention_bias:
390
+ nn.init.xavier_normal_(self.relative_attention_bias.weight)
391
+
392
+ def _relative_positions_bucket(self, relative_positions, bidirectional=True):
393
+ num_buckets = self.num_buckets
394
+ max_distance = self.max_distance
395
+ relative_buckets = 0
396
+
397
+ if bidirectional:
398
+ num_buckets = num_buckets // 2
399
+ relative_buckets += (relative_positions > 0).to(torch.long) * num_buckets
400
+ relative_positions = torch.abs(relative_positions)
401
+ else:
402
+ relative_positions = -torch.min(relative_positions, torch.zeros_like(relative_positions))
403
+
404
+ max_exact = num_buckets // 2
405
+ is_small = relative_positions < max_exact
406
+
407
+ relative_postion_if_large = max_exact + (
408
+ torch.log(relative_positions.float() / max_exact)
409
+ / math.log(max_distance / max_exact)
410
+ * (num_buckets - max_exact)
411
+ ).to(torch.long)
412
+ relative_postion_if_large = torch.min(
413
+ relative_postion_if_large, torch.full_like(relative_postion_if_large, num_buckets - 1)
414
+ )
415
+
416
+ relative_buckets += torch.where(is_small, relative_positions, relative_postion_if_large)
417
+ return relative_buckets
418
+
419
+ def compute_bias(self, query_length, key_length):
420
+ context_position = torch.arange(query_length, dtype=torch.long)[:, None]
421
+ memory_position = torch.arange(key_length, dtype=torch.long)[None, :]
422
+ relative_position = memory_position - context_position
423
+ relative_position_bucket = self._relative_positions_bucket(
424
+ relative_position,
425
+ bidirectional=True
426
+ )
427
+ relative_position_bucket = relative_position_bucket.to(self.relative_attention_bias.weight.device)
428
+ values = self.relative_attention_bias(relative_position_bucket)
429
+ values = values.permute([2, 0, 1])
430
+ return values
431
+
432
+ def forward(
433
+ self,
434
+ query,
435
+ key: Optional[Tensor],
436
+ value: Optional[Tensor],
437
+ key_padding_mask: Optional[Tensor] = None,
438
+ incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
439
+ need_weights: bool = True,
440
+ static_kv: bool = False,
441
+ attn_mask: Optional[Tensor] = None,
442
+ before_softmax: bool = False,
443
+ need_head_weights: bool = False,
444
+ position_bias: Optional[Tensor] = None
445
+ ) -> Tuple[Tensor, Optional[Tensor], Optional[Tensor]]:
446
+ """Input shape: Time x Batch x Channel
447
+
448
+ Args:
449
+ key_padding_mask (ByteTensor, optional): mask to exclude
450
+ keys that are pads, of shape `(batch, src_len)`, where
451
+ padding elements are indicated by 1s.
452
+ need_weights (bool, optional): return the attention weights,
453
+ averaged over heads (default: False).
454
+ attn_mask (ByteTensor, optional): typically used to
455
+ implement causal attention, where the mask prevents the
456
+ attention from looking forward in time (default: None).
457
+ before_softmax (bool, optional): return the raw attention
458
+ weights and values before the attention softmax.
459
+ need_head_weights (bool, optional): return the attention
460
+ weights for each head. Implies *need_weights*. Default:
461
+ return the average attention weights over all heads.
462
+ """
463
+ if need_head_weights:
464
+ need_weights = True
465
+
466
+ is_tpu = query.device.type == "xla"
467
+
468
+ tgt_len, bsz, embed_dim = query.size()
469
+ src_len = tgt_len
470
+ assert embed_dim == self.embed_dim
471
+ assert list(query.size()) == [tgt_len, bsz, embed_dim]
472
+ if key is not None:
473
+ src_len, key_bsz, _ = key.size()
474
+ if not torch.jit.is_scripting():
475
+ assert key_bsz == bsz
476
+ assert value is not None
477
+ assert src_len, bsz == value.shape[:2]
478
+
479
+ if self.has_relative_attention_bias and position_bias is None:
480
+ position_bias = self.compute_bias(tgt_len, src_len)
481
+ position_bias = position_bias.unsqueeze(0).repeat(bsz, 1, 1, 1).view(bsz * self.num_heads, tgt_len, src_len)
482
+
483
+ if incremental_state is not None:
484
+ saved_state = self._get_input_buffer(incremental_state)
485
+ if saved_state is not None and "prev_key" in saved_state:
486
+ # previous time steps are cached - no need to recompute
487
+ # key and value if they are static
488
+ if static_kv:
489
+ assert self.encoder_decoder_attention and not self.self_attention
490
+ key = value = None
491
+ else:
492
+ saved_state = None
493
+
494
+ if self.self_attention:
495
+ q = self.q_proj(query)
496
+ k = self.k_proj(query)
497
+ v = self.v_proj(query)
498
+ elif self.encoder_decoder_attention:
499
+ # encoder-decoder attention
500
+ q = self.q_proj(query)
501
+ if key is None:
502
+ assert value is None
503
+ k = v = None
504
+ else:
505
+ k = self.k_proj(key)
506
+ v = self.v_proj(key)
507
+
508
+ else:
509
+ assert key is not None and value is not None
510
+ q = self.q_proj(query)
511
+ k = self.k_proj(key)
512
+ v = self.v_proj(value)
513
+ q *= self.scaling
514
+ alpha = 32
515
+ q *= 1 / alpha
516
+
517
+ if self.bias_k is not None:
518
+ assert self.bias_v is not None
519
+ k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)])
520
+ v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)])
521
+ if attn_mask is not None:
522
+ attn_mask = torch.cat(
523
+ [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1
524
+ )
525
+ if key_padding_mask is not None:
526
+ key_padding_mask = torch.cat(
527
+ [
528
+ key_padding_mask,
529
+ key_padding_mask.new_zeros(key_padding_mask.size(0), 1),
530
+ ],
531
+ dim=1,
532
+ )
533
+
534
+ q = (
535
+ q.contiguous()
536
+ .view(tgt_len, bsz * self.num_heads, self.q_head_dim)
537
+ .transpose(0, 1)
538
+ )
539
+ if k is not None:
540
+ k = (
541
+ k.contiguous()
542
+ .view(-1, bsz * self.num_heads, self.k_head_dim)
543
+ .transpose(0, 1)
544
+ )
545
+ if v is not None:
546
+ v = (
547
+ v.contiguous()
548
+ .view(-1, bsz * self.num_heads, self.head_dim)
549
+ .transpose(0, 1)
550
+ )
551
+
552
+ if saved_state is not None:
553
+ # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
554
+ if "prev_key" in saved_state:
555
+ _prev_key = saved_state["prev_key"]
556
+ assert _prev_key is not None
557
+ prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim)
558
+ if static_kv:
559
+ k = prev_key
560
+ else:
561
+ assert k is not None
562
+ k = torch.cat([prev_key, k], dim=1)
563
+ src_len = k.size(1)
564
+ if "prev_value" in saved_state:
565
+ _prev_value = saved_state["prev_value"]
566
+ assert _prev_value is not None
567
+ prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim)
568
+ if static_kv:
569
+ v = prev_value
570
+ else:
571
+ assert v is not None
572
+ v = torch.cat([prev_value, v], dim=1)
573
+ prev_key_padding_mask: Optional[Tensor] = None
574
+ if "prev_key_padding_mask" in saved_state:
575
+ prev_key_padding_mask = saved_state["prev_key_padding_mask"]
576
+ assert k is not None and v is not None
577
+ key_padding_mask = MultiheadAttention._append_prev_key_padding_mask(
578
+ key_padding_mask=key_padding_mask,
579
+ prev_key_padding_mask=prev_key_padding_mask,
580
+ batch_size=bsz,
581
+ src_len=k.size(1),
582
+ static_kv=static_kv,
583
+ )
584
+
585
+ saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim)
586
+ saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim)
587
+ saved_state["prev_key_padding_mask"] = key_padding_mask
588
+ # In this branch incremental_state is never None
589
+ assert incremental_state is not None
590
+ incremental_state = self._set_input_buffer(incremental_state, saved_state)
591
+ assert k is not None
592
+ assert k.size(1) == src_len
593
+
594
+ # This is part of a workaround to get around fork/join parallelism
595
+ # not supporting Optional types.
596
+ if key_padding_mask is not None and key_padding_mask.dim() == 0:
597
+ key_padding_mask = None
598
+
599
+ if key_padding_mask is not None:
600
+ assert key_padding_mask.size(0) == bsz
601
+ assert key_padding_mask.size(1) == src_len
602
+
603
+ if self.add_zero_attn:
604
+ assert v is not None
605
+ src_len += 1
606
+ k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1)
607
+ v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1)
608
+ if attn_mask is not None:
609
+ attn_mask = torch.cat(
610
+ [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1
611
+ )
612
+ if key_padding_mask is not None:
613
+ key_padding_mask = torch.cat(
614
+ [
615
+ key_padding_mask,
616
+ torch.zeros(key_padding_mask.size(0), 1).type_as(
617
+ key_padding_mask
618
+ ),
619
+ ],
620
+ dim=1,
621
+ )
622
+
623
+ attn_weights = torch.bmm(q, k.transpose(1, 2))
624
+ attn_weights = (attn_weights - attn_weights.max(dim=-1, keepdim=True)[0]) * alpha
625
+ attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz)
626
+
627
+ assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]
628
+
629
+ if attn_mask is not None:
630
+ attn_mask = attn_mask.unsqueeze(0)
631
+ attn_weights += attn_mask
632
+
633
+ if key_padding_mask is not None:
634
+ # don't attend to padding symbols
635
+ attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
636
+ if not is_tpu:
637
+ attn_weights = attn_weights.masked_fill(
638
+ key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool),
639
+ float("-inf"),
640
+ )
641
+ else:
642
+ attn_weights = attn_weights.transpose(0, 2)
643
+ attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf"))
644
+ attn_weights = attn_weights.transpose(0, 2)
645
+ attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
646
+
647
+ if before_softmax:
648
+ return attn_weights, v, position_bias
649
+
650
+ if position_bias is not None:
651
+ attn_mask_rel_pos = position_bias
652
+ if self.gru_rel_pos == 1:
653
+ query_layer = q.view(bsz, self.num_heads, tgt_len, self.q_head_dim) * alpha / self.scaling
654
+ _B, _H, _L, __ = query_layer.size()
655
+ gate_a, gate_b = torch.sigmoid(self.grep_linear(query_layer).view(
656
+ _B, _H, _L, 2, 4).sum(-1, keepdim=False)).chunk(2, dim=-1)
657
+ gate_a_1 = gate_a * (gate_b * self.grep_a - 1.0) + 2.0
658
+ attn_mask_rel_pos = gate_a_1.view(bsz * self.num_heads, tgt_len, 1) * position_bias
659
+
660
+ attn_mask_rel_pos = attn_mask_rel_pos.view(attn_weights.size())
661
+
662
+ attn_weights = attn_weights + attn_mask_rel_pos
663
+
664
+ attn_weights_float = F.softmax(
665
+ attn_weights, dim=-1
666
+ )
667
+ attn_weights = attn_weights_float.type_as(attn_weights)
668
+ attn_probs = self.dropout_module(attn_weights)
669
+
670
+ assert v is not None
671
+ attn = torch.bmm(attn_probs, v)
672
+ assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
673
+ attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
674
+ attn = self.out_proj(attn)
675
+ attn_weights: Optional[Tensor] = None
676
+ if need_weights:
677
+ attn_weights = attn_weights_float.view(
678
+ bsz, self.num_heads, tgt_len, src_len
679
+ ).transpose(1, 0)
680
+ if not need_head_weights:
681
+ # average attention weights over heads
682
+ attn_weights = attn_weights.mean(dim=0)
683
+
684
+ return attn, attn_weights, position_bias
685
+
686
+ @staticmethod
687
+ def _append_prev_key_padding_mask(
688
+ key_padding_mask: Optional[Tensor],
689
+ prev_key_padding_mask: Optional[Tensor],
690
+ batch_size: int,
691
+ src_len: int,
692
+ static_kv: bool,
693
+ ) -> Optional[Tensor]:
694
+ # saved key padding masks have shape (bsz, seq_len)
695
+ if prev_key_padding_mask is not None and static_kv:
696
+ new_key_padding_mask = prev_key_padding_mask
697
+ elif prev_key_padding_mask is not None and key_padding_mask is not None:
698
+ new_key_padding_mask = torch.cat(
699
+ [prev_key_padding_mask.float(), key_padding_mask.float()], dim=1
700
+ )
701
+ # During incremental decoding, as the padding token enters and
702
+ # leaves the frame, there will be a time when prev or current
703
+ # is None
704
+ elif prev_key_padding_mask is not None:
705
+ if src_len > prev_key_padding_mask.size(1):
706
+ filler = torch.zeros(
707
+ (batch_size, src_len - prev_key_padding_mask.size(1)),
708
+ device=prev_key_padding_mask.device,
709
+ )
710
+ new_key_padding_mask = torch.cat(
711
+ [prev_key_padding_mask.float(), filler.float()], dim=1
712
+ )
713
+ else:
714
+ new_key_padding_mask = prev_key_padding_mask.float()
715
+ elif key_padding_mask is not None:
716
+ if src_len > key_padding_mask.size(1):
717
+ filler = torch.zeros(
718
+ (batch_size, src_len - key_padding_mask.size(1)),
719
+ device=key_padding_mask.device,
720
+ )
721
+ new_key_padding_mask = torch.cat(
722
+ [filler.float(), key_padding_mask.float()], dim=1
723
+ )
724
+ else:
725
+ new_key_padding_mask = key_padding_mask.float()
726
+ else:
727
+ new_key_padding_mask = prev_key_padding_mask
728
+ return new_key_padding_mask
729
+
730
+ def _get_input_buffer(
731
+ self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]]
732
+ ) -> Dict[str, Optional[Tensor]]:
733
+ result = self.get_incremental_state(incremental_state, "attn_state")
734
+ if result is not None:
735
+ return result
736
+ else:
737
+ empty_result: Dict[str, Optional[Tensor]] = {}
738
+ return empty_result
739
+
740
+ def _set_input_buffer(
741
+ self,
742
+ incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
743
+ buffer: Dict[str, Optional[Tensor]],
744
+ ):
745
+ return self.set_incremental_state(incremental_state, "attn_state", buffer)
746
+
747
+ def apply_sparse_mask(self, attn_weights, tgt_len: int, src_len: int, bsz: int):
748
+ return attn_weights
749
+
750
+
751
+ def init_bert_params(module):
752
+ """
753
+ Initialize the weights specific to the BERT Model.
754
+ This overrides the default initializations depending on the specified arguments.
755
+ 1. If normal_init_linear_weights is set then weights of linear
756
+ layer will be initialized using the normal distribution and
757
+ bais will be set to the specified value.
758
+ 2. If normal_init_embed_weights is set then weights of embedding
759
+ layer will be initialized using the normal distribution.
760
+ 3. If normal_init_proj_weights is set then weights of
761
+ in_project_weight for MultiHeadAttention initialized using
762
+ the normal distribution (to be validated).
763
+ """
764
+
765
+ def normal_(data):
766
+ # with FSDP, module params will be on CUDA, so we cast them back to CPU
767
+ # so that the RNG is consistent with and without FSDP
768
+ data.copy_(
769
+ data.cpu().normal_(mean=0.0, std=0.02).to(data.device)
770
+ )
771
+
772
+ if isinstance(module, nn.Linear):
773
+ normal_(module.weight.data)
774
+ if module.bias is not None:
775
+ module.bias.data.zero_()
776
+ if isinstance(module, nn.Embedding):
777
+ normal_(module.weight.data)
778
+ if module.padding_idx is not None:
779
+ module.weight.data[module.padding_idx].zero_()
780
+ if isinstance(module, MultiheadAttention):
781
+ normal_(module.q_proj.weight.data)
782
+ normal_(module.k_proj.weight.data)
783
+ normal_(module.v_proj.weight.data)