Upload folder using huggingface_hub
Browse files- config.json +1 -1
- model-00001-of-00031.safetensors +1 -1
- model-00002-of-00031.safetensors +1 -1
- model-00003-of-00031.safetensors +1 -1
- model-00004-of-00031.safetensors +1 -1
- model-00005-of-00031.safetensors +1 -1
- model-00006-of-00031.safetensors +1 -1
- model-00007-of-00031.safetensors +1 -1
- model-00008-of-00031.safetensors +1 -1
- model-00009-of-00031.safetensors +1 -1
- model-00010-of-00031.safetensors +1 -1
- model-00011-of-00031.safetensors +1 -1
- model-00012-of-00031.safetensors +1 -1
- model-00013-of-00031.safetensors +1 -1
- model-00014-of-00031.safetensors +1 -1
- model-00015-of-00031.safetensors +1 -1
- model-00016-of-00031.safetensors +1 -1
- model-00017-of-00031.safetensors +1 -1
- model-00018-of-00031.safetensors +1 -1
- model-00019-of-00031.safetensors +1 -1
- model-00020-of-00031.safetensors +1 -1
- model-00021-of-00031.safetensors +1 -1
- model-00022-of-00031.safetensors +1 -1
- model-00023-of-00031.safetensors +1 -1
- model-00024-of-00031.safetensors +1 -1
- model-00025-of-00031.safetensors +1 -1
- model-00026-of-00031.safetensors +1 -1
- model-00027-of-00031.safetensors +1 -1
- model-00028-of-00031.safetensors +1 -1
- model-00029-of-00031.safetensors +1 -1
- model-00030-of-00031.safetensors +1 -1
- sft_args.json +25 -14
config.json
CHANGED
|
@@ -11,7 +11,7 @@
|
|
| 11 |
"initializer_range": 0.02,
|
| 12 |
"intermediate_size": 29568,
|
| 13 |
"max_position_embeddings": 32768,
|
| 14 |
-
"max_window_layers":
|
| 15 |
"model_type": "qwen2",
|
| 16 |
"num_attention_heads": 64,
|
| 17 |
"num_hidden_layers": 80,
|
|
|
|
| 11 |
"initializer_range": 0.02,
|
| 12 |
"intermediate_size": 29568,
|
| 13 |
"max_position_embeddings": 32768,
|
| 14 |
+
"max_window_layers": 80,
|
| 15 |
"model_type": "qwen2",
|
| 16 |
"num_attention_heads": 64,
|
| 17 |
"num_hidden_layers": 80,
|
model-00001-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4548798728
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d189be1ade0a7bdf7f4bd86788f4846fc89b12b28ece8f42e2fc308dd46a7a66
|
| 3 |
size 4548798728
|
model-00002-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4964101384
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78b5bc3946135593a8f831e34003d7dbd375e3063f6047491c47c4ab550c27b6
|
| 3 |
size 4964101384
|
model-00003-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781637328
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7862bfe9a14ec5d156363302ef01285449cb52320041370ecb8a1c3a5129aad
|
| 3 |
size 4781637328
|
model-00004-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781670320
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e53b60a96bd2feac4c98e2944db566ade74c4ebedf771c67c0475f81b754464a
|
| 3 |
size 4781670320
|
model-00005-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781670360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f803d2bc0fc3fb2e7143b6d6cb62baa0c0a9b41a73fcbe6db7aa00fba8f35d63
|
| 3 |
size 4781670360
|
model-00006-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4964101416
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9894acb53032b6e041aa0cc0f04a7ac469271ce586fc3cb668046c980b4e1c98
|
| 3 |
size 4964101416
|
model-00007-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781637360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26353391e8172671035fb28c0f9eeacb2640861c4810eb71e818b83d385e7661
|
| 3 |
size 4781637360
|
model-00008-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781670360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e69ede2230a527aaf5f0682ec5fbe024225072d8872a35cde6fd1f02628a1aaa
|
| 3 |
size 4781670360
|
model-00009-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781670360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ff7dba638d6de35a8645ffc99187234da7cf0bfedbcdfb1a574282d9e3b4a5e
|
| 3 |
size 4781670360
|
model-00010-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4964101416
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:224ef0f6b77c28660ca872db218a01c205c4b91c21c6f4903adb240f1feaf1f8
|
| 3 |
size 4964101416
|
model-00011-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781637360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:659a24bd9c681cd7cd8eb93cdbbffcc79dea955cd4887f37bd1799ad24e69a8c
|
| 3 |
size 4781637360
|
model-00012-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781670360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40484b390a629b6e841385e30f26340222aa8c0093f46d60340580565235dbd9
|
| 3 |
size 4781670360
|
model-00013-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781670360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83104b9bb184383b0bf37aab2239213001189737ea57d68811ed9964d7a6dca7
|
| 3 |
size 4781670360
|
model-00014-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4964101416
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ce46a4c2dc37bdf983941c0526d69d2b813d735ae0aeb876a6087229dc417e9
|
| 3 |
size 4964101416
|
model-00015-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781637360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4847faed6468d33dcd1788e2710fa69ccb3af19cd8596666083222dac817d24
|
| 3 |
size 4781637360
|
model-00016-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781670360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c127df484608133e95f54d275190f407045546131c9592f2572b2441737e7f00
|
| 3 |
size 4781670360
|
model-00017-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781670360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c45204750c9e3811f1f1ffae752f5ee6b21ac10b4a7a1e1d86b1dfb50bf7e3d
|
| 3 |
size 4781670360
|
model-00018-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4964101416
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c62fcd870f1b623db6cb09c9e79ca81144ec6f185e0498e46aa13bc131d7d89
|
| 3 |
size 4964101416
|
model-00019-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781637360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d409e6527535f1a390bad83d35fd9854e43b362c96f422fe5c0564c3639a6be2
|
| 3 |
size 4781637360
|
model-00020-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781670360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31084d34a4ab0bfadba9c2f8291f92c8935eacbb5f300018ae7f215c69ac2d41
|
| 3 |
size 4781670360
|
model-00021-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781670360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52d14a8c3d34fd4017dec29421d18f8aae080e7e40a5584f7d426ddde143a7c3
|
| 3 |
size 4781670360
|
model-00022-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4964101416
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d960bf183aa80d626d58d8cae7c77a6b3a790e4b5b3780631e60d9337b08a404
|
| 3 |
size 4964101416
|
model-00023-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781637360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18e0b8b591fbe6b65cc551f23cacdf65b704333ff06a60661cb90373812f60d9
|
| 3 |
size 4781637360
|
model-00024-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781670360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5908f63dac691fbf0bb05873263cfc42e4256f04ec6751b01052e1a7a10e9139
|
| 3 |
size 4781670360
|
model-00025-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781670360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6aa32b5839014fb96b10ddf685c0b7895ce54de4048917f1a15109a3ff497d50
|
| 3 |
size 4781670360
|
model-00026-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4964101416
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca4f2b8ce19789b17e2926ed859b17007f4828f5508af163b9c70bd2c7ad6ad9
|
| 3 |
size 4964101416
|
model-00027-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781637360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fe48f82357032630f6bcb1c7e191a9362f1602498709c2889f8cb3f4aaedbf3
|
| 3 |
size 4781637360
|
model-00028-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781670360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0868ceb592ef939c9703dca6b5e985abe32a797a5a2fcd08db6260cef94b835
|
| 3 |
size 4781670360
|
model-00029-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4781670360
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6bca7d7b0a094b243e47d96ff300a0e3a657df7353000e4221a899c9908f53e6
|
| 3 |
size 4781670360
|
model-00030-of-00031.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3208747032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ade1434e683b8cc9c98732b463dca7b91a9a513a350ffad97e1e7fcb067dd5ca
|
| 3 |
size 3208747032
|
sft_args.json
CHANGED
|
@@ -7,13 +7,13 @@
|
|
| 7 |
"additional_trainable_parameters": [],
|
| 8 |
"tuner_backend": "peft",
|
| 9 |
"template_type": "qwen",
|
| 10 |
-
"output_dir": "/root/swift/
|
| 11 |
"add_output_dir_suffix": true,
|
| 12 |
"ddp_backend": "nccl",
|
| 13 |
"ddp_find_unused_parameters": null,
|
| 14 |
"ddp_broadcast_buffers": null,
|
| 15 |
"seed": 42,
|
| 16 |
-
"resume_from_checkpoint":
|
| 17 |
"resume_only_model": false,
|
| 18 |
"ignore_data_skip": false,
|
| 19 |
"dtype": "bf16",
|
|
@@ -23,6 +23,8 @@
|
|
| 23 |
"dataset/moni_dataset.jsonl",
|
| 24 |
"dataset/moni_dataset.jsonl",
|
| 25 |
"dataset/moni_dataset.jsonl",
|
|
|
|
|
|
|
| 26 |
"dataset/covid_new.jsonl",
|
| 27 |
"dataset/covid_new.jsonl",
|
| 28 |
"dataset/covid_new.jsonl",
|
|
@@ -75,21 +77,18 @@
|
|
| 75 |
"bnb_4bit_quant_type": "nf4",
|
| 76 |
"bnb_4bit_use_double_quant": true,
|
| 77 |
"bnb_4bit_quant_storage": null,
|
| 78 |
-
"lora_target_modules": [
|
| 79 |
-
|
| 80 |
-
"k_proj",
|
| 81 |
-
"v_proj"
|
| 82 |
-
],
|
| 83 |
"lora_rank": 16,
|
| 84 |
"lora_alpha": 32,
|
| 85 |
"lora_dropout_p": 0.06,
|
| 86 |
"lora_bias_trainable": "none",
|
| 87 |
"lora_modules_to_save": [],
|
| 88 |
-
"lora_dtype":
|
| 89 |
"lora_lr_ratio": null,
|
| 90 |
"use_rslora": false,
|
| 91 |
"use_dora": false,
|
| 92 |
-
"init_lora_weights": true,
|
| 93 |
"rope_scaling": null,
|
| 94 |
"boft_block_size": 4,
|
| 95 |
"boft_block_num": 0,
|
|
@@ -117,6 +116,13 @@
|
|
| 117 |
"galore_proj_type": "std",
|
| 118 |
"galore_optim_per_parameter": false,
|
| 119 |
"galore_with_embedding": false,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
"adalora_target_r": 8,
|
| 121 |
"adalora_init_r": 12,
|
| 122 |
"adalora_tinit": 0,
|
|
@@ -140,7 +146,7 @@
|
|
| 140 |
"deepspeed": null,
|
| 141 |
"batch_size": 2,
|
| 142 |
"eval_batch_size": 2,
|
| 143 |
-
"num_train_epochs":
|
| 144 |
"max_steps": -1,
|
| 145 |
"optim": "adamw_torch",
|
| 146 |
"adam_beta1": 0.9,
|
|
@@ -151,13 +157,16 @@
|
|
| 151 |
"gradient_accumulation_steps": 16,
|
| 152 |
"max_grad_norm": 0.5,
|
| 153 |
"predict_with_generate": false,
|
| 154 |
-
"lr_scheduler_type": "
|
|
|
|
| 155 |
"warmup_ratio": 0.03,
|
|
|
|
| 156 |
"eval_steps": 50,
|
| 157 |
"save_steps": 50,
|
| 158 |
"save_only_model": false,
|
| 159 |
"save_total_limit": null,
|
| 160 |
"logging_steps": 5,
|
|
|
|
| 161 |
"dataloader_num_workers": 1,
|
| 162 |
"dataloader_pin_memory": true,
|
| 163 |
"dataloader_drop_last": false,
|
|
@@ -173,7 +182,7 @@
|
|
| 173 |
"use_flash_attn": true,
|
| 174 |
"ignore_args_error": false,
|
| 175 |
"check_model_is_latest": true,
|
| 176 |
-
"logging_dir": "/root/swift/
|
| 177 |
"report_to": [
|
| 178 |
"tensorboard"
|
| 179 |
],
|
|
@@ -188,6 +197,7 @@
|
|
| 188 |
"custom_register_path": null,
|
| 189 |
"custom_dataset_info": "/root/swift/dataset/ds_info.json",
|
| 190 |
"device_map_config_path": null,
|
|
|
|
| 191 |
"max_new_tokens": 2048,
|
| 192 |
"do_sample": true,
|
| 193 |
"temperature": 0.3,
|
|
@@ -219,8 +229,9 @@
|
|
| 219 |
"custom_train_dataset_path": [],
|
| 220 |
"custom_val_dataset_path": [],
|
| 221 |
"use_self_cognition": false,
|
|
|
|
| 222 |
"lora_use_embedding": false,
|
| 223 |
-
"lora_use_all":
|
| 224 |
"lora_m2s_use_embedding": false,
|
| 225 |
"lora_m2s_use_ln": false,
|
| 226 |
"torch_dtype": "torch.bfloat16",
|
|
@@ -230,5 +241,5 @@
|
|
| 230 |
"load_in_4bit": false,
|
| 231 |
"load_in_8bit": false,
|
| 232 |
"train_sampler_random": true,
|
| 233 |
-
"training_args": "Seq2SeqTrainingArguments(output_dir='/root/swift/
|
| 234 |
}
|
|
|
|
| 7 |
"additional_trainable_parameters": [],
|
| 8 |
"tuner_backend": "peft",
|
| 9 |
"template_type": "qwen",
|
| 10 |
+
"output_dir": "/root/swift/output/qwen2-72b-instruct/v6-20240730-130324",
|
| 11 |
"add_output_dir_suffix": true,
|
| 12 |
"ddp_backend": "nccl",
|
| 13 |
"ddp_find_unused_parameters": null,
|
| 14 |
"ddp_broadcast_buffers": null,
|
| 15 |
"seed": 42,
|
| 16 |
+
"resume_from_checkpoint": "/root/swift/output/qwen2-72b-instruct/v5-20240730-113708/checkpoint-700",
|
| 17 |
"resume_only_model": false,
|
| 18 |
"ignore_data_skip": false,
|
| 19 |
"dtype": "bf16",
|
|
|
|
| 23 |
"dataset/moni_dataset.jsonl",
|
| 24 |
"dataset/moni_dataset.jsonl",
|
| 25 |
"dataset/moni_dataset.jsonl",
|
| 26 |
+
"dataset/moni_dataset_en.jsonl",
|
| 27 |
+
"dataset/moni_dataset_en.jsonl",
|
| 28 |
"dataset/covid_new.jsonl",
|
| 29 |
"dataset/covid_new.jsonl",
|
| 30 |
"dataset/covid_new.jsonl",
|
|
|
|
| 77 |
"bnb_4bit_quant_type": "nf4",
|
| 78 |
"bnb_4bit_use_double_quant": true,
|
| 79 |
"bnb_4bit_quant_storage": null,
|
| 80 |
+
"lora_target_modules": [],
|
| 81 |
+
"lora_target_regex": null,
|
|
|
|
|
|
|
|
|
|
| 82 |
"lora_rank": 16,
|
| 83 |
"lora_alpha": 32,
|
| 84 |
"lora_dropout_p": 0.06,
|
| 85 |
"lora_bias_trainable": "none",
|
| 86 |
"lora_modules_to_save": [],
|
| 87 |
+
"lora_dtype": "AUTO",
|
| 88 |
"lora_lr_ratio": null,
|
| 89 |
"use_rslora": false,
|
| 90 |
"use_dora": false,
|
| 91 |
+
"init_lora_weights": "true",
|
| 92 |
"rope_scaling": null,
|
| 93 |
"boft_block_size": 4,
|
| 94 |
"boft_block_num": 0,
|
|
|
|
| 116 |
"galore_proj_type": "std",
|
| 117 |
"galore_optim_per_parameter": false,
|
| 118 |
"galore_with_embedding": false,
|
| 119 |
+
"galore_quantization": false,
|
| 120 |
+
"galore_proj_quant": false,
|
| 121 |
+
"galore_proj_bits": 4,
|
| 122 |
+
"galore_proj_group_size": 256,
|
| 123 |
+
"galore_cos_threshold": 0.4,
|
| 124 |
+
"galore_gamma_proj": 2,
|
| 125 |
+
"galore_queue_size": 5,
|
| 126 |
"adalora_target_r": 8,
|
| 127 |
"adalora_init_r": 12,
|
| 128 |
"adalora_tinit": 0,
|
|
|
|
| 146 |
"deepspeed": null,
|
| 147 |
"batch_size": 2,
|
| 148 |
"eval_batch_size": 2,
|
| 149 |
+
"num_train_epochs": 8,
|
| 150 |
"max_steps": -1,
|
| 151 |
"optim": "adamw_torch",
|
| 152 |
"adam_beta1": 0.9,
|
|
|
|
| 157 |
"gradient_accumulation_steps": 16,
|
| 158 |
"max_grad_norm": 0.5,
|
| 159 |
"predict_with_generate": false,
|
| 160 |
+
"lr_scheduler_type": "cosine",
|
| 161 |
+
"lr_scheduler_kwargs": {},
|
| 162 |
"warmup_ratio": 0.03,
|
| 163 |
+
"warmup_steps": 0,
|
| 164 |
"eval_steps": 50,
|
| 165 |
"save_steps": 50,
|
| 166 |
"save_only_model": false,
|
| 167 |
"save_total_limit": null,
|
| 168 |
"logging_steps": 5,
|
| 169 |
+
"acc_steps": 1,
|
| 170 |
"dataloader_num_workers": 1,
|
| 171 |
"dataloader_pin_memory": true,
|
| 172 |
"dataloader_drop_last": false,
|
|
|
|
| 182 |
"use_flash_attn": true,
|
| 183 |
"ignore_args_error": false,
|
| 184 |
"check_model_is_latest": true,
|
| 185 |
+
"logging_dir": "/root/swift/output/qwen2-72b-instruct/v6-20240730-130324/runs",
|
| 186 |
"report_to": [
|
| 187 |
"tensorboard"
|
| 188 |
],
|
|
|
|
| 197 |
"custom_register_path": null,
|
| 198 |
"custom_dataset_info": "/root/swift/dataset/ds_info.json",
|
| 199 |
"device_map_config_path": null,
|
| 200 |
+
"device_max_memory": [],
|
| 201 |
"max_new_tokens": 2048,
|
| 202 |
"do_sample": true,
|
| 203 |
"temperature": 0.3,
|
|
|
|
| 229 |
"custom_train_dataset_path": [],
|
| 230 |
"custom_val_dataset_path": [],
|
| 231 |
"use_self_cognition": false,
|
| 232 |
+
"is_multimodal": false,
|
| 233 |
"lora_use_embedding": false,
|
| 234 |
+
"lora_use_all": true,
|
| 235 |
"lora_m2s_use_embedding": false,
|
| 236 |
"lora_m2s_use_ln": false,
|
| 237 |
"torch_dtype": "torch.bfloat16",
|
|
|
|
| 241 |
"load_in_4bit": false,
|
| 242 |
"load_in_8bit": false,
|
| 243 |
"train_sampler_random": true,
|
| 244 |
+
"training_args": "Seq2SeqTrainingArguments(output_dir='/root/swift/output/qwen2-72b-instruct/v6-20240730-130324', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=<IntervalStrategy.STEPS: 'steps'>, prediction_loss_only=False, per_device_train_batch_size=2, per_device_eval_batch_size=2, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=16, eval_accumulation_steps=None, eval_delay=0, learning_rate=0.0001, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=0.5, num_train_epochs=8, max_steps=-1, lr_scheduler_type=<SchedulerType.COSINE: 'cosine'>, lr_scheduler_kwargs={}, warmup_ratio=0.03, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/root/swift/output/qwen2-72b-instruct/v6-20240730-130324/runs', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=<IntervalStrategy.STEPS: 'steps'>, save_steps=50, save_total_limit=None, save_safetensors=True, save_on_each_node=True, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=None, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend='nccl', tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=50, dataloader_num_workers=1, dataloader_prefetch_factor=None, past_index=-1, run_name='/root/swift/output/qwen2-72b-instruct/v6-20240730-130324', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=None, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None), deepspeed=None, label_smoothing_factor=0.0, optim=<OptimizerNames.ADAMW_TORCH: 'adamw_torch'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['tensorboard'], ddp_find_unused_parameters=False, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=False, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint='/root/swift/output/qwen2-72b-instruct/v5-20240730-113708/checkpoint-700', hub_model_id=None, hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=False, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy=None, push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=False, include_num_input_tokens_seen=False, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, sortish_sampler=True, predict_with_generate=False, generation_max_length=None, generation_num_beams=None, generation_config=GenerationConfig {\n \"do_sample\": true,\n \"eos_token_id\": 151645,\n \"max_new_tokens\": 2048,\n \"pad_token_id\": 151643,\n \"temperature\": 0.3,\n \"top_k\": 20,\n \"top_p\": 0.7\n}\n, train_sampler_random=True, push_hub_strategy='push_best', acc_strategy='token', additional_saved_files=[], metric_warmup_step=0, train_dataset_sample=17395)"
|
| 245 |
}
|