ryanmarten commited on
Commit
b2e1491
·
verified ·
1 Parent(s): a341031

Model save

Browse files
README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: Qwen/Qwen2.5-7B-Instruct
5
+ tags:
6
+ - llama-factory
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: b2_science_fasttext_pos_scp116k_3k
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # b2_science_fasttext_pos_scp116k_3k
17
+
18
+ This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on an unknown dataset.
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 2e-05
38
+ - train_batch_size: 1
39
+ - eval_batch_size: 8
40
+ - seed: 42
41
+ - distributed_type: multi-GPU
42
+ - num_devices: 4
43
+ - gradient_accumulation_steps: 24
44
+ - total_train_batch_size: 96
45
+ - total_eval_batch_size: 32
46
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
47
+ - lr_scheduler_type: cosine
48
+ - lr_scheduler_warmup_ratio: 0.1
49
+ - num_epochs: 7.0
50
+
51
+ ### Training results
52
+
53
+
54
+
55
+ ### Framework versions
56
+
57
+ - Transformers 4.46.1
58
+ - Pytorch 2.6.0+cu124
59
+ - Datasets 3.1.0
60
+ - Tokenizers 0.20.3
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.05,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.46.1"
14
+ }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c639c4910b46821f8d847c49a107f20f3883024baa1021548903c6adee8916a
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:866bf76d95e1ed0d3f661b5aac84754c919ada122d71b009bc22dae8682616b8
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cb7573cb205b9474ea7c38c439ac95dcfcb6046306d6132c8b514f659ddf6c3
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37e627bf5b0513c7d3cfd3bb027531b86957f9b66b2826613b0403c17ab092ee
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1b8331f8e355df574cb0d7f8e4767cc0a630f9da48867df51153db6b3db9b48
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d07a1d707fde714062e8be1be84c879696ff409632053bfb5c3fb1ce7e28eb1d
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90df38853cc5cee959aca7ba2738e7bcd1de3fd268399a4a8baf8e2836f402e7
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e12f73cf4ab94e90b2470dd37edb1e4f7a37bf0af6a248ca38676a99a83e943c
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -400,3 +400,51 @@
400
  {"current_steps": 200, "total_steps": 224, "loss": 0.1206, "lr": 6.953470369291349e-07, "epoch": 6.121518987341772, "percentage": 89.29, "elapsed_time": "2:17:24", "remaining_time": "0:16:29"}
401
  {"current_steps": 201, "total_steps": 224, "loss": 0.1172, "lr": 6.392213479945852e-07, "epoch": 6.151898734177215, "percentage": 89.73, "elapsed_time": "2:18:07", "remaining_time": "0:15:48"}
402
  {"current_steps": 201, "total_steps": 224, "loss": 0.1169, "lr": 6.392213479945852e-07, "epoch": 6.151898734177215, "percentage": 89.73, "elapsed_time": "2:17:59", "remaining_time": "0:15:47"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  {"current_steps": 200, "total_steps": 224, "loss": 0.1206, "lr": 6.953470369291349e-07, "epoch": 6.121518987341772, "percentage": 89.29, "elapsed_time": "2:17:24", "remaining_time": "0:16:29"}
401
  {"current_steps": 201, "total_steps": 224, "loss": 0.1172, "lr": 6.392213479945852e-07, "epoch": 6.151898734177215, "percentage": 89.73, "elapsed_time": "2:18:07", "remaining_time": "0:15:48"}
402
  {"current_steps": 201, "total_steps": 224, "loss": 0.1169, "lr": 6.392213479945852e-07, "epoch": 6.151898734177215, "percentage": 89.73, "elapsed_time": "2:17:59", "remaining_time": "0:15:47"}
403
+ {"current_steps": 202, "total_steps": 224, "loss": 0.1316, "lr": 5.853823673448877e-07, "epoch": 6.182278481012658, "percentage": 90.18, "elapsed_time": "2:18:53", "remaining_time": "0:15:07"}
404
+ {"current_steps": 202, "total_steps": 224, "loss": 0.1314, "lr": 5.853823673448877e-07, "epoch": 6.182278481012658, "percentage": 90.18, "elapsed_time": "2:18:45", "remaining_time": "0:15:06"}
405
+ {"current_steps": 203, "total_steps": 224, "loss": 0.1565, "lr": 5.33843247095659e-07, "epoch": 6.2126582278481015, "percentage": 90.62, "elapsed_time": "2:19:37", "remaining_time": "0:14:26"}
406
+ {"current_steps": 203, "total_steps": 224, "loss": 0.1561, "lr": 5.33843247095659e-07, "epoch": 6.2126582278481015, "percentage": 90.62, "elapsed_time": "2:19:29", "remaining_time": "0:14:25"}
407
+ {"current_steps": 204, "total_steps": 224, "loss": 0.1398, "lr": 4.846165775385459e-07, "epoch": 6.243037974683545, "percentage": 91.07, "elapsed_time": "2:20:15", "remaining_time": "0:13:45"}
408
+ {"current_steps": 204, "total_steps": 224, "loss": 0.1394, "lr": 4.846165775385459e-07, "epoch": 6.243037974683545, "percentage": 91.07, "elapsed_time": "2:20:07", "remaining_time": "0:13:44"}
409
+ {"current_steps": 205, "total_steps": 224, "loss": 0.1207, "lr": 4.3771438406559173e-07, "epoch": 6.273417721518987, "percentage": 91.52, "elapsed_time": "2:20:57", "remaining_time": "0:13:03"}
410
+ {"current_steps": 205, "total_steps": 224, "loss": 0.1193, "lr": 4.3771438406559173e-07, "epoch": 6.273417721518987, "percentage": 91.52, "elapsed_time": "2:20:49", "remaining_time": "0:13:03"}
411
+ {"current_steps": 206, "total_steps": 224, "loss": 0.1036, "lr": 3.931481242315993e-07, "epoch": 6.30379746835443, "percentage": 91.96, "elapsed_time": "2:21:24", "remaining_time": "0:12:21"}
412
+ {"current_steps": 206, "total_steps": 224, "loss": 0.1037, "lr": 3.931481242315993e-07, "epoch": 6.30379746835443, "percentage": 91.96, "elapsed_time": "2:21:16", "remaining_time": "0:12:20"}
413
+ {"current_steps": 207, "total_steps": 224, "loss": 0.1095, "lr": 3.5092868495520294e-07, "epoch": 6.334177215189873, "percentage": 92.41, "elapsed_time": "2:22:02", "remaining_time": "0:11:39"}
414
+ {"current_steps": 207, "total_steps": 224, "loss": 0.1085, "lr": 3.5092868495520294e-07, "epoch": 6.334177215189873, "percentage": 92.41, "elapsed_time": "2:21:54", "remaining_time": "0:11:39"}
415
+ {"current_steps": 208, "total_steps": 224, "loss": 0.1251, "lr": 3.110663798593616e-07, "epoch": 6.364556962025317, "percentage": 92.86, "elapsed_time": "2:22:42", "remaining_time": "0:10:58"}
416
+ {"current_steps": 208, "total_steps": 224, "loss": 0.1252, "lr": 3.110663798593616e-07, "epoch": 6.364556962025317, "percentage": 92.86, "elapsed_time": "2:22:33", "remaining_time": "0:10:57"}
417
+ {"current_steps": 209, "total_steps": 224, "loss": 0.1068, "lr": 2.735709467518699e-07, "epoch": 6.39493670886076, "percentage": 93.3, "elapsed_time": "2:23:18", "remaining_time": "0:10:17"}
418
+ {"current_steps": 209, "total_steps": 224, "loss": 0.1062, "lr": 2.735709467518699e-07, "epoch": 6.39493670886076, "percentage": 93.3, "elapsed_time": "2:23:10", "remaining_time": "0:10:16"}
419
+ {"current_steps": 210, "total_steps": 224, "loss": 0.1471, "lr": 2.384515452465475e-07, "epoch": 6.425316455696202, "percentage": 93.75, "elapsed_time": "2:24:01", "remaining_time": "0:09:36"}
420
+ {"current_steps": 210, "total_steps": 224, "loss": 0.1469, "lr": 2.384515452465475e-07, "epoch": 6.425316455696202, "percentage": 93.75, "elapsed_time": "2:23:53", "remaining_time": "0:09:35"}
421
+ {"current_steps": 211, "total_steps": 224, "loss": 0.1419, "lr": 2.0571675452567997e-07, "epoch": 6.455696202531645, "percentage": 94.2, "elapsed_time": "2:24:56", "remaining_time": "0:08:55"}
422
+ {"current_steps": 211, "total_steps": 224, "loss": 0.1425, "lr": 2.0571675452567997e-07, "epoch": 6.455696202531645, "percentage": 94.2, "elapsed_time": "2:24:47", "remaining_time": "0:08:55"}
423
+ {"current_steps": 212, "total_steps": 224, "loss": 0.1077, "lr": 1.7537457124423896e-07, "epoch": 6.4860759493670885, "percentage": 94.64, "elapsed_time": "2:25:32", "remaining_time": "0:08:14"}
424
+ {"current_steps": 212, "total_steps": 224, "loss": 0.108, "lr": 1.7537457124423896e-07, "epoch": 6.4860759493670885, "percentage": 94.64, "elapsed_time": "2:25:23", "remaining_time": "0:08:13"}
425
+ {"current_steps": 213, "total_steps": 224, "loss": 0.1311, "lr": 1.474324075764111e-07, "epoch": 6.516455696202532, "percentage": 95.09, "elapsed_time": "2:26:10", "remaining_time": "0:07:32"}
426
+ {"current_steps": 213, "total_steps": 224, "loss": 0.1306, "lr": 1.474324075764111e-07, "epoch": 6.516455696202532, "percentage": 95.09, "elapsed_time": "2:26:01", "remaining_time": "0:07:32"}
427
+ {"current_steps": 214, "total_steps": 224, "loss": 0.1198, "lr": 1.2189708940490653e-07, "epoch": 6.546835443037975, "percentage": 95.54, "elapsed_time": "2:26:44", "remaining_time": "0:06:51"}
428
+ {"current_steps": 214, "total_steps": 224, "loss": 0.1183, "lr": 1.2189708940490653e-07, "epoch": 6.546835443037975, "percentage": 95.54, "elapsed_time": "2:26:34", "remaining_time": "0:06:50"}
429
+ {"current_steps": 215, "total_steps": 224, "loss": 0.1419, "lr": 9.877485465349057e-08, "epoch": 6.577215189873417, "percentage": 95.98, "elapsed_time": "2:27:34", "remaining_time": "0:06:10"}
430
+ {"current_steps": 215, "total_steps": 224, "loss": 0.142, "lr": 9.877485465349057e-08, "epoch": 6.577215189873417, "percentage": 95.98, "elapsed_time": "2:27:25", "remaining_time": "0:06:10"}
431
+ {"current_steps": 216, "total_steps": 224, "loss": 0.1155, "lr": 7.807135176314707e-08, "epoch": 6.6075949367088604, "percentage": 96.43, "elapsed_time": "2:28:09", "remaining_time": "0:05:29"}
432
+ {"current_steps": 216, "total_steps": 224, "loss": 0.1155, "lr": 7.807135176314707e-08, "epoch": 6.6075949367088604, "percentage": 96.43, "elapsed_time": "2:28:00", "remaining_time": "0:05:28"}
433
+ {"current_steps": 217, "total_steps": 224, "loss": 0.1155, "lr": 5.979163831223988e-08, "epoch": 6.637974683544304, "percentage": 96.88, "elapsed_time": "2:28:49", "remaining_time": "0:04:48"}
434
+ {"current_steps": 217, "total_steps": 224, "loss": 0.1156, "lr": 5.979163831223988e-08, "epoch": 6.637974683544304, "percentage": 96.88, "elapsed_time": "2:28:39", "remaining_time": "0:04:47"}
435
+ {"current_steps": 218, "total_steps": 224, "loss": 0.1181, "lr": 4.394017978101905e-08, "epoch": 6.668354430379747, "percentage": 97.32, "elapsed_time": "2:29:20", "remaining_time": "0:04:06"}
436
+ {"current_steps": 218, "total_steps": 224, "loss": 0.1185, "lr": 4.394017978101905e-08, "epoch": 6.668354430379747, "percentage": 97.32, "elapsed_time": "2:29:10", "remaining_time": "0:04:06"}
437
+ {"current_steps": 219, "total_steps": 224, "loss": 0.1148, "lr": 3.0520848460765525e-08, "epoch": 6.69873417721519, "percentage": 97.77, "elapsed_time": "2:30:07", "remaining_time": "0:03:25"}
438
+ {"current_steps": 219, "total_steps": 224, "loss": 0.1143, "lr": 3.0520848460765525e-08, "epoch": 6.69873417721519, "percentage": 97.77, "elapsed_time": "2:29:57", "remaining_time": "0:03:25"}
439
+ {"current_steps": 220, "total_steps": 224, "loss": 0.1485, "lr": 1.9536922507841227e-08, "epoch": 6.729113924050633, "percentage": 98.21, "elapsed_time": "2:30:49", "remaining_time": "0:02:44"}
440
+ {"current_steps": 220, "total_steps": 224, "loss": 0.1482, "lr": 1.9536922507841227e-08, "epoch": 6.729113924050633, "percentage": 98.21, "elapsed_time": "2:30:39", "remaining_time": "0:02:44"}
441
+ {"current_steps": 221, "total_steps": 224, "loss": 0.1266, "lr": 1.099108514288627e-08, "epoch": 6.759493670886076, "percentage": 98.66, "elapsed_time": "2:31:27", "remaining_time": "0:02:03"}
442
+ {"current_steps": 221, "total_steps": 224, "loss": 0.1269, "lr": 1.099108514288627e-08, "epoch": 6.759493670886076, "percentage": 98.66, "elapsed_time": "2:31:17", "remaining_time": "0:02:03"}
443
+ {"current_steps": 222, "total_steps": 224, "loss": 0.133, "lr": 4.885423995341088e-09, "epoch": 6.789873417721519, "percentage": 99.11, "elapsed_time": "2:32:12", "remaining_time": "0:01:22"}
444
+ {"current_steps": 222, "total_steps": 224, "loss": 0.1329, "lr": 4.885423995341088e-09, "epoch": 6.789873417721519, "percentage": 99.11, "elapsed_time": "2:32:02", "remaining_time": "0:01:22"}
445
+ {"current_steps": 223, "total_steps": 224, "loss": 0.0989, "lr": 1.2214305934699078e-09, "epoch": 6.820253164556962, "percentage": 99.55, "elapsed_time": "2:32:46", "remaining_time": "0:00:41"}
446
+ {"current_steps": 223, "total_steps": 224, "loss": 0.0986, "lr": 1.2214305934699078e-09, "epoch": 6.820253164556962, "percentage": 99.55, "elapsed_time": "2:32:36", "remaining_time": "0:00:41"}
447
+ {"current_steps": 224, "total_steps": 224, "loss": 0.1226, "lr": 0.0, "epoch": 6.850632911392405, "percentage": 100.0, "elapsed_time": "2:33:22", "remaining_time": "0:00:00"}
448
+ {"current_steps": 224, "total_steps": 224, "loss": 0.1222, "lr": 0.0, "epoch": 6.850632911392405, "percentage": 100.0, "elapsed_time": "2:33:11", "remaining_time": "0:00:00"}
449
+ {"current_steps": 224, "total_steps": 224, "epoch": 6.850632911392405, "percentage": 100.0, "elapsed_time": "2:35:56", "remaining_time": "0:00:00"}
450
+ {"current_steps": 224, "total_steps": 224, "epoch": 6.850632911392405, "percentage": 100.0, "elapsed_time": "2:36:07", "remaining_time": "0:00:00"}