End of training
Browse files- README.md +161 -0
- all_results.json +7 -0
- config.json +26 -0
- generation_config.json +7 -0
- model.safetensors +3 -0
- runs/Feb11_11-26-36_MacBook-Pro-sunsetsobserver.local/events.out.tfevents.1707647197.MacBook-Pro-sunsetsobserver.local.91650.0 +3 -0
- runs/Feb19_23-08-36_node851/events.out.tfevents.1708380519.node851.11924.0 +3 -0
- runs/Feb20_08-13-24_node857/events.out.tfevents.1708413207.node857.32813.0 +3 -0
- train_results.json +7 -0
- trainer_state.json +0 -0
- training_args.bin +3 -0
README.md
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- generated_from_trainer
|
| 4 |
+
metrics:
|
| 5 |
+
- accuracy
|
| 6 |
+
model-index:
|
| 7 |
+
- name: runs
|
| 8 |
+
results: []
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 12 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 13 |
+
|
| 14 |
+
# runs
|
| 15 |
+
|
| 16 |
+
This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
|
| 17 |
+
It achieves the following results on the evaluation set:
|
| 18 |
+
- Loss: 24.0950
|
| 19 |
+
- Accuracy: 0.0013
|
| 20 |
+
|
| 21 |
+
## Model description
|
| 22 |
+
|
| 23 |
+
More information needed
|
| 24 |
+
|
| 25 |
+
## Intended uses & limitations
|
| 26 |
+
|
| 27 |
+
More information needed
|
| 28 |
+
|
| 29 |
+
## Training and evaluation data
|
| 30 |
+
|
| 31 |
+
More information needed
|
| 32 |
+
|
| 33 |
+
## Training procedure
|
| 34 |
+
|
| 35 |
+
### Training hyperparameters
|
| 36 |
+
|
| 37 |
+
The following hyperparameters were used during training:
|
| 38 |
+
- learning_rate: 0.0001
|
| 39 |
+
- train_batch_size: 16
|
| 40 |
+
- eval_batch_size: 48
|
| 41 |
+
- seed: 444
|
| 42 |
+
- gradient_accumulation_steps: 3
|
| 43 |
+
- total_train_batch_size: 48
|
| 44 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
| 45 |
+
- lr_scheduler_type: cosine_with_restarts
|
| 46 |
+
- lr_scheduler_warmup_ratio: 0.3
|
| 47 |
+
- training_steps: 100000
|
| 48 |
+
- mixed_precision_training: Native AMP
|
| 49 |
+
|
| 50 |
+
### Training results
|
| 51 |
+
|
| 52 |
+
| Training Loss | Epoch | Step | Validation Loss | Accuracy |
|
| 53 |
+
|:-------------:|:------:|:------:|:---------------:|:--------:|
|
| 54 |
+
| 8.2359 | 6.04 | 1000 | 8.2170 | 0.0070 |
|
| 55 |
+
| 7.7137 | 12.07 | 2000 | 7.7007 | 0.0064 |
|
| 56 |
+
| 6.5277 | 18.11 | 3000 | 6.5254 | 0.0000 |
|
| 57 |
+
| 6.0375 | 24.14 | 4000 | 6.0532 | 0.0000 |
|
| 58 |
+
| 5.6908 | 30.18 | 5000 | 5.7100 | 0.0001 |
|
| 59 |
+
| 5.4294 | 36.22 | 6000 | 5.4758 | 0.0002 |
|
| 60 |
+
| 5.2161 | 42.25 | 7000 | 5.2891 | 0.0006 |
|
| 61 |
+
| 5.0151 | 48.29 | 8000 | 5.1152 | 0.0021 |
|
| 62 |
+
| 4.8349 | 54.33 | 9000 | 4.9847 | 0.0020 |
|
| 63 |
+
| 4.6358 | 60.36 | 10000 | 4.8754 | 0.0022 |
|
| 64 |
+
| 4.4326 | 66.4 | 11000 | 4.7809 | 0.0021 |
|
| 65 |
+
| 4.2632 | 72.43 | 12000 | 4.7416 | 0.0017 |
|
| 66 |
+
| 4.0415 | 78.47 | 13000 | 4.7503 | 0.0016 |
|
| 67 |
+
| 3.8196 | 84.51 | 14000 | 4.8472 | 0.0014 |
|
| 68 |
+
| 3.6207 | 90.54 | 15000 | 5.0215 | 0.0014 |
|
| 69 |
+
| 3.3163 | 96.58 | 16000 | 5.2939 | 0.0014 |
|
| 70 |
+
| 3.0377 | 102.62 | 17000 | 5.6685 | 0.0014 |
|
| 71 |
+
| 2.7272 | 108.65 | 18000 | 6.1649 | 0.0013 |
|
| 72 |
+
| 2.4319 | 114.69 | 19000 | 6.7556 | 0.0013 |
|
| 73 |
+
| 2.1647 | 120.72 | 20000 | 7.3951 | 0.0013 |
|
| 74 |
+
| 1.9001 | 126.76 | 21000 | 8.0823 | 0.0013 |
|
| 75 |
+
| 1.6708 | 132.8 | 22000 | 8.8230 | 0.0013 |
|
| 76 |
+
| 1.4762 | 138.83 | 23000 | 9.5335 | 0.0013 |
|
| 77 |
+
| 1.2833 | 144.87 | 24000 | 10.1973 | 0.0013 |
|
| 78 |
+
| 1.1451 | 150.91 | 25000 | 10.8213 | 0.0013 |
|
| 79 |
+
| 1.0251 | 156.94 | 26000 | 11.4402 | 0.0013 |
|
| 80 |
+
| 0.9164 | 162.98 | 27000 | 11.9995 | 0.0013 |
|
| 81 |
+
| 0.8174 | 169.01 | 28000 | 12.5680 | 0.0013 |
|
| 82 |
+
| 0.6862 | 175.05 | 29000 | 13.0050 | 0.0013 |
|
| 83 |
+
| 0.5738 | 181.09 | 30000 | 13.4692 | 0.0013 |
|
| 84 |
+
| 0.4524 | 187.12 | 31000 | 13.9220 | 0.0013 |
|
| 85 |
+
| 0.4252 | 193.16 | 32000 | 14.3340 | 0.0013 |
|
| 86 |
+
| 0.3952 | 199.2 | 33000 | 14.7961 | 0.0013 |
|
| 87 |
+
| 0.3684 | 205.23 | 34000 | 15.2421 | 0.0013 |
|
| 88 |
+
| 0.3338 | 211.27 | 35000 | 15.6433 | 0.0013 |
|
| 89 |
+
| 0.307 | 217.3 | 36000 | 16.0182 | 0.0013 |
|
| 90 |
+
| 0.2951 | 223.34 | 37000 | 16.3087 | 0.0013 |
|
| 91 |
+
| 0.28 | 229.38 | 38000 | 16.6556 | 0.0013 |
|
| 92 |
+
| 0.2688 | 235.41 | 39000 | 16.9303 | 0.0013 |
|
| 93 |
+
| 0.2582 | 241.45 | 40000 | 17.2209 | 0.0013 |
|
| 94 |
+
| 0.238 | 247.48 | 41000 | 17.5311 | 0.0013 |
|
| 95 |
+
| 0.2261 | 253.52 | 42000 | 17.7731 | 0.0013 |
|
| 96 |
+
| 0.21 | 259.56 | 43000 | 18.0205 | 0.0013 |
|
| 97 |
+
| 0.2073 | 265.59 | 44000 | 18.2693 | 0.0013 |
|
| 98 |
+
| 0.1976 | 271.63 | 45000 | 18.4634 | 0.0013 |
|
| 99 |
+
| 0.1865 | 277.67 | 46000 | 18.7215 | 0.0012 |
|
| 100 |
+
| 0.1769 | 283.7 | 47000 | 18.9467 | 0.0013 |
|
| 101 |
+
| 0.1649 | 289.74 | 48000 | 19.1423 | 0.0013 |
|
| 102 |
+
| 0.1517 | 295.77 | 49000 | 19.3638 | 0.0013 |
|
| 103 |
+
| 0.1491 | 301.81 | 50000 | 19.5879 | 0.0013 |
|
| 104 |
+
| 0.1387 | 307.85 | 51000 | 19.7823 | 0.0013 |
|
| 105 |
+
| 0.1332 | 313.88 | 52000 | 19.9663 | 0.0013 |
|
| 106 |
+
| 0.1256 | 319.92 | 53000 | 20.1907 | 0.0013 |
|
| 107 |
+
| 0.1154 | 325.96 | 54000 | 20.3939 | 0.0013 |
|
| 108 |
+
| 0.1091 | 331.99 | 55000 | 20.5926 | 0.0013 |
|
| 109 |
+
| 0.0928 | 338.03 | 56000 | 20.8044 | 0.0013 |
|
| 110 |
+
| 0.0812 | 344.06 | 57000 | 20.9873 | 0.0013 |
|
| 111 |
+
| 0.0677 | 350.1 | 58000 | 21.1931 | 0.0013 |
|
| 112 |
+
| 0.0609 | 356.14 | 59000 | 21.3650 | 0.0013 |
|
| 113 |
+
| 0.058 | 362.17 | 60000 | 21.5868 | 0.0013 |
|
| 114 |
+
| 0.0532 | 368.21 | 61000 | 21.7740 | 0.0013 |
|
| 115 |
+
| 0.0481 | 374.25 | 62000 | 21.9339 | 0.0013 |
|
| 116 |
+
| 0.0358 | 380.28 | 63000 | 22.1660 | 0.0012 |
|
| 117 |
+
| 0.0117 | 386.32 | 64000 | 22.4226 | 0.0013 |
|
| 118 |
+
| 0.0768 | 392.35 | 65000 | 22.2193 | 0.0013 |
|
| 119 |
+
| 0.0339 | 398.39 | 66000 | 22.3833 | 0.0013 |
|
| 120 |
+
| 0.0191 | 404.43 | 67000 | 22.5927 | 0.0013 |
|
| 121 |
+
| 0.0493 | 410.46 | 68000 | 22.6069 | 0.0013 |
|
| 122 |
+
| 0.0115 | 416.5 | 69000 | 22.8652 | 0.0012 |
|
| 123 |
+
| 0.0111 | 422.54 | 70000 | 22.9982 | 0.0012 |
|
| 124 |
+
| 0.1182 | 428.57 | 71000 | 22.6628 | 0.0013 |
|
| 125 |
+
| 0.0118 | 434.61 | 72000 | 22.9036 | 0.0013 |
|
| 126 |
+
| 0.0111 | 440.64 | 73000 | 23.0692 | 0.0013 |
|
| 127 |
+
| 0.011 | 446.68 | 74000 | 23.1857 | 0.0013 |
|
| 128 |
+
| 0.0386 | 452.72 | 75000 | 22.9263 | 0.0013 |
|
| 129 |
+
| 0.0109 | 458.75 | 76000 | 23.1548 | 0.0013 |
|
| 130 |
+
| 0.0109 | 464.79 | 77000 | 23.2761 | 0.0012 |
|
| 131 |
+
| 0.0108 | 470.82 | 78000 | 23.3763 | 0.0013 |
|
| 132 |
+
| 0.0131 | 476.86 | 79000 | 23.2048 | 0.0013 |
|
| 133 |
+
| 0.0108 | 482.9 | 80000 | 23.3772 | 0.0013 |
|
| 134 |
+
| 0.0106 | 488.93 | 81000 | 23.4733 | 0.0013 |
|
| 135 |
+
| 0.0106 | 494.97 | 82000 | 23.5654 | 0.0013 |
|
| 136 |
+
| 0.0242 | 501.01 | 83000 | 23.5459 | 0.0013 |
|
| 137 |
+
| 0.0104 | 507.04 | 84000 | 23.5695 | 0.0013 |
|
| 138 |
+
| 0.01 | 513.08 | 85000 | 23.6659 | 0.0013 |
|
| 139 |
+
| 0.0098 | 519.11 | 86000 | 23.7337 | 0.0013 |
|
| 140 |
+
| 0.0097 | 525.15 | 87000 | 23.7961 | 0.0013 |
|
| 141 |
+
| 0.0097 | 531.19 | 88000 | 23.8573 | 0.0013 |
|
| 142 |
+
| 0.0097 | 537.22 | 89000 | 23.9052 | 0.0013 |
|
| 143 |
+
| 0.0097 | 543.26 | 90000 | 23.9524 | 0.0013 |
|
| 144 |
+
| 0.0096 | 549.3 | 91000 | 23.9823 | 0.0013 |
|
| 145 |
+
| 0.0096 | 555.33 | 92000 | 24.0084 | 0.0013 |
|
| 146 |
+
| 0.0095 | 561.37 | 93000 | 24.0364 | 0.0013 |
|
| 147 |
+
| 0.0095 | 567.4 | 94000 | 24.0545 | 0.0013 |
|
| 148 |
+
| 0.0094 | 573.44 | 95000 | 24.0701 | 0.0013 |
|
| 149 |
+
| 0.0094 | 579.48 | 96000 | 24.0826 | 0.0013 |
|
| 150 |
+
| 0.0093 | 585.51 | 97000 | 24.0898 | 0.0013 |
|
| 151 |
+
| 0.0093 | 591.55 | 98000 | 24.0935 | 0.0013 |
|
| 152 |
+
| 0.0093 | 597.59 | 99000 | 24.0944 | 0.0013 |
|
| 153 |
+
| 0.0092 | 603.62 | 100000 | 24.0950 | 0.0013 |
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
### Framework versions
|
| 157 |
+
|
| 158 |
+
- Transformers 4.37.2
|
| 159 |
+
- Pytorch 2.2.0+cu121
|
| 160 |
+
- Datasets 2.17.0
|
| 161 |
+
- Tokenizers 0.15.1
|
all_results.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 603.62,
|
| 3 |
+
"train_loss": 1.1590602387964726,
|
| 4 |
+
"train_runtime": 91564.1404,
|
| 5 |
+
"train_samples_per_second": 52.422,
|
| 6 |
+
"train_steps_per_second": 1.092
|
| 7 |
+
}
|
config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"MistralForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"bos_token_id": 2,
|
| 7 |
+
"eos_token_id": 3,
|
| 8 |
+
"hidden_act": "silu",
|
| 9 |
+
"hidden_size": 512,
|
| 10 |
+
"initializer_range": 0.02,
|
| 11 |
+
"intermediate_size": 2048,
|
| 12 |
+
"max_position_embeddings": 8192,
|
| 13 |
+
"model_type": "mistral",
|
| 14 |
+
"num_attention_heads": 8,
|
| 15 |
+
"num_hidden_layers": 8,
|
| 16 |
+
"num_key_value_heads": 4,
|
| 17 |
+
"pad_token_id": 0,
|
| 18 |
+
"rms_norm_eps": 1e-06,
|
| 19 |
+
"rope_theta": 10000.0,
|
| 20 |
+
"sliding_window": 256,
|
| 21 |
+
"tie_word_embeddings": false,
|
| 22 |
+
"torch_dtype": "float32",
|
| 23 |
+
"transformers_version": "4.37.2",
|
| 24 |
+
"use_cache": true,
|
| 25 |
+
"vocab_size": 10000
|
| 26 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 2,
|
| 4 |
+
"eos_token_id": 3,
|
| 5 |
+
"pad_token_id": 0,
|
| 6 |
+
"transformers_version": "4.37.2"
|
| 7 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c1247e57feefd0721e9dc8cef693affcfbc1e2146251ce42407dbcd57b597e1
|
| 3 |
+
size 166832176
|
runs/Feb11_11-26-36_MacBook-Pro-sunsetsobserver.local/events.out.tfevents.1707647197.MacBook-Pro-sunsetsobserver.local.91650.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8a198491c1df49be35f607fa537dfb5da0acd9fa6894dcc5c1c534655c67e24
|
| 3 |
+
size 130
|
runs/Feb19_23-08-36_node851/events.out.tfevents.1708380519.node851.11924.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b907aa510505c0198599e02846aa8a235ede5498d5bbe16bbc2290130540ffc
|
| 3 |
+
size 8491
|
runs/Feb20_08-13-24_node857/events.out.tfevents.1708413207.node857.32813.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7b69461b4201ecbd6816b6a6e2d3d0de75dd9064117386fdaeedeafb46874b7
|
| 3 |
+
size 834901
|
train_results.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 603.62,
|
| 3 |
+
"train_loss": 1.1590602387964726,
|
| 4 |
+
"train_runtime": 91564.1404,
|
| 5 |
+
"train_samples_per_second": 52.422,
|
| 6 |
+
"train_steps_per_second": 1.092
|
| 7 |
+
}
|
trainer_state.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66c4d5509c43eef8667a3ddeca37d26f95e56853bbe64a5e1fe0c02098435fbe
|
| 3 |
+
size 4664
|