Model save
Browse files- README.md +60 -0
- generation_config.json +14 -0
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +48 -0
README.md
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: transformers
|
| 3 |
+
license: apache-2.0
|
| 4 |
+
base_model: Qwen/Qwen2.5-7B-Instruct
|
| 5 |
+
tags:
|
| 6 |
+
- llama-factory
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
model-index:
|
| 9 |
+
- name: b2_science_fasttext_pos_scp116k_3k
|
| 10 |
+
results: []
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 14 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 15 |
+
|
| 16 |
+
# b2_science_fasttext_pos_scp116k_3k
|
| 17 |
+
|
| 18 |
+
This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on an unknown dataset.
|
| 19 |
+
|
| 20 |
+
## Model description
|
| 21 |
+
|
| 22 |
+
More information needed
|
| 23 |
+
|
| 24 |
+
## Intended uses & limitations
|
| 25 |
+
|
| 26 |
+
More information needed
|
| 27 |
+
|
| 28 |
+
## Training and evaluation data
|
| 29 |
+
|
| 30 |
+
More information needed
|
| 31 |
+
|
| 32 |
+
## Training procedure
|
| 33 |
+
|
| 34 |
+
### Training hyperparameters
|
| 35 |
+
|
| 36 |
+
The following hyperparameters were used during training:
|
| 37 |
+
- learning_rate: 2e-05
|
| 38 |
+
- train_batch_size: 1
|
| 39 |
+
- eval_batch_size: 8
|
| 40 |
+
- seed: 42
|
| 41 |
+
- distributed_type: multi-GPU
|
| 42 |
+
- num_devices: 4
|
| 43 |
+
- gradient_accumulation_steps: 24
|
| 44 |
+
- total_train_batch_size: 96
|
| 45 |
+
- total_eval_batch_size: 32
|
| 46 |
+
- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 47 |
+
- lr_scheduler_type: cosine
|
| 48 |
+
- lr_scheduler_warmup_ratio: 0.1
|
| 49 |
+
- num_epochs: 7.0
|
| 50 |
+
|
| 51 |
+
### Training results
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
### Framework versions
|
| 56 |
+
|
| 57 |
+
- Transformers 4.46.1
|
| 58 |
+
- Pytorch 2.6.0+cu124
|
| 59 |
+
- Datasets 3.1.0
|
| 60 |
+
- Tokenizers 0.20.3
|
generation_config.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 151643,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
151645,
|
| 6 |
+
151643
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151643,
|
| 9 |
+
"repetition_penalty": 1.05,
|
| 10 |
+
"temperature": 0.7,
|
| 11 |
+
"top_k": 20,
|
| 12 |
+
"top_p": 0.8,
|
| 13 |
+
"transformers_version": "4.46.1"
|
| 14 |
+
}
|
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4877660776
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:866bf76d95e1ed0d3f661b5aac84754c919ada122d71b009bc22dae8682616b8
|
| 3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4932751008
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37e627bf5b0513c7d3cfd3bb027531b86957f9b66b2826613b0403c17ab092ee
|
| 3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4330865200
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d07a1d707fde714062e8be1be84c879696ff409632053bfb5c3fb1ce7e28eb1d
|
| 3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1089994880
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e12f73cf4ab94e90b2470dd37edb1e4f7a37bf0af6a248ca38676a99a83e943c
|
| 3 |
size 1089994880
|
trainer_log.jsonl
CHANGED
|
@@ -400,3 +400,51 @@
|
|
| 400 |
{"current_steps": 200, "total_steps": 224, "loss": 0.1206, "lr": 6.953470369291349e-07, "epoch": 6.121518987341772, "percentage": 89.29, "elapsed_time": "2:17:24", "remaining_time": "0:16:29"}
|
| 401 |
{"current_steps": 201, "total_steps": 224, "loss": 0.1172, "lr": 6.392213479945852e-07, "epoch": 6.151898734177215, "percentage": 89.73, "elapsed_time": "2:18:07", "remaining_time": "0:15:48"}
|
| 402 |
{"current_steps": 201, "total_steps": 224, "loss": 0.1169, "lr": 6.392213479945852e-07, "epoch": 6.151898734177215, "percentage": 89.73, "elapsed_time": "2:17:59", "remaining_time": "0:15:47"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
{"current_steps": 200, "total_steps": 224, "loss": 0.1206, "lr": 6.953470369291349e-07, "epoch": 6.121518987341772, "percentage": 89.29, "elapsed_time": "2:17:24", "remaining_time": "0:16:29"}
|
| 401 |
{"current_steps": 201, "total_steps": 224, "loss": 0.1172, "lr": 6.392213479945852e-07, "epoch": 6.151898734177215, "percentage": 89.73, "elapsed_time": "2:18:07", "remaining_time": "0:15:48"}
|
| 402 |
{"current_steps": 201, "total_steps": 224, "loss": 0.1169, "lr": 6.392213479945852e-07, "epoch": 6.151898734177215, "percentage": 89.73, "elapsed_time": "2:17:59", "remaining_time": "0:15:47"}
|
| 403 |
+
{"current_steps": 202, "total_steps": 224, "loss": 0.1316, "lr": 5.853823673448877e-07, "epoch": 6.182278481012658, "percentage": 90.18, "elapsed_time": "2:18:53", "remaining_time": "0:15:07"}
|
| 404 |
+
{"current_steps": 202, "total_steps": 224, "loss": 0.1314, "lr": 5.853823673448877e-07, "epoch": 6.182278481012658, "percentage": 90.18, "elapsed_time": "2:18:45", "remaining_time": "0:15:06"}
|
| 405 |
+
{"current_steps": 203, "total_steps": 224, "loss": 0.1565, "lr": 5.33843247095659e-07, "epoch": 6.2126582278481015, "percentage": 90.62, "elapsed_time": "2:19:37", "remaining_time": "0:14:26"}
|
| 406 |
+
{"current_steps": 203, "total_steps": 224, "loss": 0.1561, "lr": 5.33843247095659e-07, "epoch": 6.2126582278481015, "percentage": 90.62, "elapsed_time": "2:19:29", "remaining_time": "0:14:25"}
|
| 407 |
+
{"current_steps": 204, "total_steps": 224, "loss": 0.1398, "lr": 4.846165775385459e-07, "epoch": 6.243037974683545, "percentage": 91.07, "elapsed_time": "2:20:15", "remaining_time": "0:13:45"}
|
| 408 |
+
{"current_steps": 204, "total_steps": 224, "loss": 0.1394, "lr": 4.846165775385459e-07, "epoch": 6.243037974683545, "percentage": 91.07, "elapsed_time": "2:20:07", "remaining_time": "0:13:44"}
|
| 409 |
+
{"current_steps": 205, "total_steps": 224, "loss": 0.1207, "lr": 4.3771438406559173e-07, "epoch": 6.273417721518987, "percentage": 91.52, "elapsed_time": "2:20:57", "remaining_time": "0:13:03"}
|
| 410 |
+
{"current_steps": 205, "total_steps": 224, "loss": 0.1193, "lr": 4.3771438406559173e-07, "epoch": 6.273417721518987, "percentage": 91.52, "elapsed_time": "2:20:49", "remaining_time": "0:13:03"}
|
| 411 |
+
{"current_steps": 206, "total_steps": 224, "loss": 0.1036, "lr": 3.931481242315993e-07, "epoch": 6.30379746835443, "percentage": 91.96, "elapsed_time": "2:21:24", "remaining_time": "0:12:21"}
|
| 412 |
+
{"current_steps": 206, "total_steps": 224, "loss": 0.1037, "lr": 3.931481242315993e-07, "epoch": 6.30379746835443, "percentage": 91.96, "elapsed_time": "2:21:16", "remaining_time": "0:12:20"}
|
| 413 |
+
{"current_steps": 207, "total_steps": 224, "loss": 0.1095, "lr": 3.5092868495520294e-07, "epoch": 6.334177215189873, "percentage": 92.41, "elapsed_time": "2:22:02", "remaining_time": "0:11:39"}
|
| 414 |
+
{"current_steps": 207, "total_steps": 224, "loss": 0.1085, "lr": 3.5092868495520294e-07, "epoch": 6.334177215189873, "percentage": 92.41, "elapsed_time": "2:21:54", "remaining_time": "0:11:39"}
|
| 415 |
+
{"current_steps": 208, "total_steps": 224, "loss": 0.1251, "lr": 3.110663798593616e-07, "epoch": 6.364556962025317, "percentage": 92.86, "elapsed_time": "2:22:42", "remaining_time": "0:10:58"}
|
| 416 |
+
{"current_steps": 208, "total_steps": 224, "loss": 0.1252, "lr": 3.110663798593616e-07, "epoch": 6.364556962025317, "percentage": 92.86, "elapsed_time": "2:22:33", "remaining_time": "0:10:57"}
|
| 417 |
+
{"current_steps": 209, "total_steps": 224, "loss": 0.1068, "lr": 2.735709467518699e-07, "epoch": 6.39493670886076, "percentage": 93.3, "elapsed_time": "2:23:18", "remaining_time": "0:10:17"}
|
| 418 |
+
{"current_steps": 209, "total_steps": 224, "loss": 0.1062, "lr": 2.735709467518699e-07, "epoch": 6.39493670886076, "percentage": 93.3, "elapsed_time": "2:23:10", "remaining_time": "0:10:16"}
|
| 419 |
+
{"current_steps": 210, "total_steps": 224, "loss": 0.1471, "lr": 2.384515452465475e-07, "epoch": 6.425316455696202, "percentage": 93.75, "elapsed_time": "2:24:01", "remaining_time": "0:09:36"}
|
| 420 |
+
{"current_steps": 210, "total_steps": 224, "loss": 0.1469, "lr": 2.384515452465475e-07, "epoch": 6.425316455696202, "percentage": 93.75, "elapsed_time": "2:23:53", "remaining_time": "0:09:35"}
|
| 421 |
+
{"current_steps": 211, "total_steps": 224, "loss": 0.1419, "lr": 2.0571675452567997e-07, "epoch": 6.455696202531645, "percentage": 94.2, "elapsed_time": "2:24:56", "remaining_time": "0:08:55"}
|
| 422 |
+
{"current_steps": 211, "total_steps": 224, "loss": 0.1425, "lr": 2.0571675452567997e-07, "epoch": 6.455696202531645, "percentage": 94.2, "elapsed_time": "2:24:47", "remaining_time": "0:08:55"}
|
| 423 |
+
{"current_steps": 212, "total_steps": 224, "loss": 0.1077, "lr": 1.7537457124423896e-07, "epoch": 6.4860759493670885, "percentage": 94.64, "elapsed_time": "2:25:32", "remaining_time": "0:08:14"}
|
| 424 |
+
{"current_steps": 212, "total_steps": 224, "loss": 0.108, "lr": 1.7537457124423896e-07, "epoch": 6.4860759493670885, "percentage": 94.64, "elapsed_time": "2:25:23", "remaining_time": "0:08:13"}
|
| 425 |
+
{"current_steps": 213, "total_steps": 224, "loss": 0.1311, "lr": 1.474324075764111e-07, "epoch": 6.516455696202532, "percentage": 95.09, "elapsed_time": "2:26:10", "remaining_time": "0:07:32"}
|
| 426 |
+
{"current_steps": 213, "total_steps": 224, "loss": 0.1306, "lr": 1.474324075764111e-07, "epoch": 6.516455696202532, "percentage": 95.09, "elapsed_time": "2:26:01", "remaining_time": "0:07:32"}
|
| 427 |
+
{"current_steps": 214, "total_steps": 224, "loss": 0.1198, "lr": 1.2189708940490653e-07, "epoch": 6.546835443037975, "percentage": 95.54, "elapsed_time": "2:26:44", "remaining_time": "0:06:51"}
|
| 428 |
+
{"current_steps": 214, "total_steps": 224, "loss": 0.1183, "lr": 1.2189708940490653e-07, "epoch": 6.546835443037975, "percentage": 95.54, "elapsed_time": "2:26:34", "remaining_time": "0:06:50"}
|
| 429 |
+
{"current_steps": 215, "total_steps": 224, "loss": 0.1419, "lr": 9.877485465349057e-08, "epoch": 6.577215189873417, "percentage": 95.98, "elapsed_time": "2:27:34", "remaining_time": "0:06:10"}
|
| 430 |
+
{"current_steps": 215, "total_steps": 224, "loss": 0.142, "lr": 9.877485465349057e-08, "epoch": 6.577215189873417, "percentage": 95.98, "elapsed_time": "2:27:25", "remaining_time": "0:06:10"}
|
| 431 |
+
{"current_steps": 216, "total_steps": 224, "loss": 0.1155, "lr": 7.807135176314707e-08, "epoch": 6.6075949367088604, "percentage": 96.43, "elapsed_time": "2:28:09", "remaining_time": "0:05:29"}
|
| 432 |
+
{"current_steps": 216, "total_steps": 224, "loss": 0.1155, "lr": 7.807135176314707e-08, "epoch": 6.6075949367088604, "percentage": 96.43, "elapsed_time": "2:28:00", "remaining_time": "0:05:28"}
|
| 433 |
+
{"current_steps": 217, "total_steps": 224, "loss": 0.1155, "lr": 5.979163831223988e-08, "epoch": 6.637974683544304, "percentage": 96.88, "elapsed_time": "2:28:49", "remaining_time": "0:04:48"}
|
| 434 |
+
{"current_steps": 217, "total_steps": 224, "loss": 0.1156, "lr": 5.979163831223988e-08, "epoch": 6.637974683544304, "percentage": 96.88, "elapsed_time": "2:28:39", "remaining_time": "0:04:47"}
|
| 435 |
+
{"current_steps": 218, "total_steps": 224, "loss": 0.1181, "lr": 4.394017978101905e-08, "epoch": 6.668354430379747, "percentage": 97.32, "elapsed_time": "2:29:20", "remaining_time": "0:04:06"}
|
| 436 |
+
{"current_steps": 218, "total_steps": 224, "loss": 0.1185, "lr": 4.394017978101905e-08, "epoch": 6.668354430379747, "percentage": 97.32, "elapsed_time": "2:29:10", "remaining_time": "0:04:06"}
|
| 437 |
+
{"current_steps": 219, "total_steps": 224, "loss": 0.1148, "lr": 3.0520848460765525e-08, "epoch": 6.69873417721519, "percentage": 97.77, "elapsed_time": "2:30:07", "remaining_time": "0:03:25"}
|
| 438 |
+
{"current_steps": 219, "total_steps": 224, "loss": 0.1143, "lr": 3.0520848460765525e-08, "epoch": 6.69873417721519, "percentage": 97.77, "elapsed_time": "2:29:57", "remaining_time": "0:03:25"}
|
| 439 |
+
{"current_steps": 220, "total_steps": 224, "loss": 0.1485, "lr": 1.9536922507841227e-08, "epoch": 6.729113924050633, "percentage": 98.21, "elapsed_time": "2:30:49", "remaining_time": "0:02:44"}
|
| 440 |
+
{"current_steps": 220, "total_steps": 224, "loss": 0.1482, "lr": 1.9536922507841227e-08, "epoch": 6.729113924050633, "percentage": 98.21, "elapsed_time": "2:30:39", "remaining_time": "0:02:44"}
|
| 441 |
+
{"current_steps": 221, "total_steps": 224, "loss": 0.1266, "lr": 1.099108514288627e-08, "epoch": 6.759493670886076, "percentage": 98.66, "elapsed_time": "2:31:27", "remaining_time": "0:02:03"}
|
| 442 |
+
{"current_steps": 221, "total_steps": 224, "loss": 0.1269, "lr": 1.099108514288627e-08, "epoch": 6.759493670886076, "percentage": 98.66, "elapsed_time": "2:31:17", "remaining_time": "0:02:03"}
|
| 443 |
+
{"current_steps": 222, "total_steps": 224, "loss": 0.133, "lr": 4.885423995341088e-09, "epoch": 6.789873417721519, "percentage": 99.11, "elapsed_time": "2:32:12", "remaining_time": "0:01:22"}
|
| 444 |
+
{"current_steps": 222, "total_steps": 224, "loss": 0.1329, "lr": 4.885423995341088e-09, "epoch": 6.789873417721519, "percentage": 99.11, "elapsed_time": "2:32:02", "remaining_time": "0:01:22"}
|
| 445 |
+
{"current_steps": 223, "total_steps": 224, "loss": 0.0989, "lr": 1.2214305934699078e-09, "epoch": 6.820253164556962, "percentage": 99.55, "elapsed_time": "2:32:46", "remaining_time": "0:00:41"}
|
| 446 |
+
{"current_steps": 223, "total_steps": 224, "loss": 0.0986, "lr": 1.2214305934699078e-09, "epoch": 6.820253164556962, "percentage": 99.55, "elapsed_time": "2:32:36", "remaining_time": "0:00:41"}
|
| 447 |
+
{"current_steps": 224, "total_steps": 224, "loss": 0.1226, "lr": 0.0, "epoch": 6.850632911392405, "percentage": 100.0, "elapsed_time": "2:33:22", "remaining_time": "0:00:00"}
|
| 448 |
+
{"current_steps": 224, "total_steps": 224, "loss": 0.1222, "lr": 0.0, "epoch": 6.850632911392405, "percentage": 100.0, "elapsed_time": "2:33:11", "remaining_time": "0:00:00"}
|
| 449 |
+
{"current_steps": 224, "total_steps": 224, "epoch": 6.850632911392405, "percentage": 100.0, "elapsed_time": "2:35:56", "remaining_time": "0:00:00"}
|
| 450 |
+
{"current_steps": 224, "total_steps": 224, "epoch": 6.850632911392405, "percentage": 100.0, "elapsed_time": "2:36:07", "remaining_time": "0:00:00"}
|