apwic commited on
Commit
92d7dd6
·
verified ·
1 Parent(s): 73699fc

Training in progress, epoch 1

Browse files
Files changed (35) hide show
  1. README.md +68 -0
  2. all_results.json +25 -0
  3. config.json +74 -0
  4. eval_results.json +12 -0
  5. generation_config.json +6 -0
  6. model.safetensors +3 -0
  7. predict_results.json +10 -0
  8. predictions.txt +0 -0
  9. runs/May23_15-28-39_indolem-petl-vm/events.out.tfevents.1716478124.indolem-petl-vm.791594.0 +3 -0
  10. runs/May23_15-28-39_indolem-petl-vm/events.out.tfevents.1716479516.indolem-petl-vm.791594.1 +3 -0
  11. runs/May23_15-58-27_indolem-petl-vm/events.out.tfevents.1716479912.indolem-petl-vm.810287.0 +3 -0
  12. runs/May23_15-58-27_indolem-petl-vm/events.out.tfevents.1716481277.indolem-petl-vm.810287.1 +3 -0
  13. runs/May23_16-45-56_indolem-petl-vm/events.out.tfevents.1716482761.indolem-petl-vm.843792.0 +3 -0
  14. runs/May23_16-48-23_indolem-petl-vm/events.out.tfevents.1716482908.indolem-petl-vm.844581.0 +3 -0
  15. runs/May23_16-55-56_indolem-petl-vm/events.out.tfevents.1716483361.indolem-petl-vm.846656.0 +3 -0
  16. runs/May23_17-13-50_indolem-petl-vm/events.out.tfevents.1716484464.indolem-petl-vm.862477.0 +3 -0
  17. runs/May23_17-20-25_indolem-petl-vm/events.out.tfevents.1716484831.indolem-petl-vm.865169.0 +3 -0
  18. runs/May23_17-24-54_indolem-petl-vm/events.out.tfevents.1716485099.indolem-petl-vm.866978.0 +3 -0
  19. runs/May23_17-28-21_indolem-petl-vm/events.out.tfevents.1716485307.indolem-petl-vm.868628.0 +3 -0
  20. runs/May23_18-19-48_indolem-petl-vm/events.out.tfevents.1716488392.indolem-petl-vm.903361.0 +3 -0
  21. runs/May23_18-19-48_indolem-petl-vm/events.out.tfevents.1716488523.indolem-petl-vm.903361.1 +3 -0
  22. runs/May23_18-42-33_indolem-petl-vm/events.out.tfevents.1716489763.indolem-petl-vm.919895.0 +3 -0
  23. runs/May23_18-42-33_indolem-petl-vm/events.out.tfevents.1716489887.indolem-petl-vm.919895.1 +3 -0
  24. runs/May23_18-50-55_indolem-petl-vm/events.out.tfevents.1716490265.indolem-petl-vm.922188.0 +3 -0
  25. runs/May23_18-50-55_indolem-petl-vm/events.out.tfevents.1716490392.indolem-petl-vm.922188.1 +3 -0
  26. runs/May23_18-55-04_indolem-petl-vm/events.out.tfevents.1716490513.indolem-petl-vm.923382.0 +3 -0
  27. runs/May23_18-55-04_indolem-petl-vm/events.out.tfevents.1716490645.indolem-petl-vm.923382.1 +3 -0
  28. runs/May24_13-16-21_indolem-petl-vm/events.out.tfevents.1716556588.indolem-petl-vm.1421377.0 +3 -0
  29. special_tokens_map.json +7 -0
  30. tokenizer.json +0 -0
  31. tokenizer_config.json +57 -0
  32. train_results.json +8 -0
  33. trainer_state.json +49 -0
  34. training_args.bin +3 -0
  35. vocab.txt +0 -0
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - id
4
+ license: mit
5
+ base_model: indolem/indobert-base-uncased
6
+ tags:
7
+ - generated_from_trainer
8
+ metrics:
9
+ - precision
10
+ - recall
11
+ - f1
12
+ - accuracy
13
+ model-index:
14
+ - name: nerugm-base
15
+ results: []
16
+ ---
17
+
18
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
19
+ should probably proofread and complete it, then remove this comment. -->
20
+
21
+ # nerugm-base
22
+
23
+ This model is a fine-tuned version of [indolem/indobert-base-uncased](https://huggingface.co/indolem/indobert-base-uncased) on an unknown dataset.
24
+ It achieves the following results on the evaluation set:
25
+ - Loss: 0.1575
26
+ - Precision: 0.7765
27
+ - Recall: 0.8884
28
+ - F1: 0.8287
29
+ - Accuracy: 0.9632
30
+
31
+ ## Model description
32
+
33
+ More information needed
34
+
35
+ ## Intended uses & limitations
36
+
37
+ More information needed
38
+
39
+ ## Training and evaluation data
40
+
41
+ More information needed
42
+
43
+ ## Training procedure
44
+
45
+ ### Training hyperparameters
46
+
47
+ The following hyperparameters were used during training:
48
+ - learning_rate: 5e-05
49
+ - train_batch_size: 16
50
+ - eval_batch_size: 64
51
+ - seed: 42
52
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
53
+ - lr_scheduler_type: linear
54
+ - num_epochs: 1.0
55
+
56
+ ### Training results
57
+
58
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
59
+ |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
60
+ | 0.226 | 1.0 | 528 | 0.1575 | 0.7765 | 0.8884 | 0.8287 | 0.9632 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - Transformers 4.39.3
66
+ - Pytorch 2.3.0+cu121
67
+ - Datasets 2.19.1
68
+ - Tokenizers 0.15.2
all_results.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.963231640663818,
4
+ "eval_f1": 0.8287262872628726,
5
+ "eval_loss": 0.15753257274627686,
6
+ "eval_precision": 0.776536312849162,
7
+ "eval_recall": 0.8884369552585706,
8
+ "eval_runtime": 5.3652,
9
+ "eval_samples": 935,
10
+ "eval_samples_per_second": 174.271,
11
+ "eval_steps_per_second": 2.796,
12
+ "predict_accuracy": 0.9876279863481229,
13
+ "predict_f1": 0.9276586033774533,
14
+ "predict_loss": 0.04439631476998329,
15
+ "predict_precision": 0.9009308510638298,
16
+ "predict_recall": 0.9560206961429916,
17
+ "predict_runtime": 12.7417,
18
+ "predict_samples_per_second": 183.884,
19
+ "predict_steps_per_second": 2.904,
20
+ "train_loss": 0.2260448426911325,
21
+ "train_runtime": 123.2,
22
+ "train_samples": 8437,
23
+ "train_samples_per_second": 68.482,
24
+ "train_steps_per_second": 4.286
25
+ }
config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "indolem/indobert-base-uncased",
3
+ "adapters": {
4
+ "adapters": {},
5
+ "config_map": {},
6
+ "fusion_config_map": {},
7
+ "fusions": {}
8
+ },
9
+ "architectures": [
10
+ "BertAdapterModel"
11
+ ],
12
+ "attention_probs_dropout_prob": 0.1,
13
+ "bos_token_id": 0,
14
+ "classifier_dropout": null,
15
+ "custom_heads": {},
16
+ "eos_token_ids": 0,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.1,
19
+ "hidden_size": 768,
20
+ "id2label": {
21
+ "0": "B-LOCATION",
22
+ "1": "B-ORGANIZATION",
23
+ "2": "B-PERSON",
24
+ "3": "B-QUANTITY",
25
+ "4": "B-TIME",
26
+ "5": "I-LOCATION",
27
+ "6": "I-ORGANIZATION",
28
+ "7": "I-PERSON",
29
+ "8": "I-QUANTITY",
30
+ "9": "I-TIME",
31
+ "10": "O"
32
+ },
33
+ "initializer_range": 0.02,
34
+ "intermediate_size": 3072,
35
+ "label2id": {
36
+ "B-LOCATION": 0,
37
+ "B-ORGANIZATION": 1,
38
+ "B-PERSON": 2,
39
+ "B-QUANTITY": 3,
40
+ "B-TIME": 4,
41
+ "I-LOCATION": 5,
42
+ "I-ORGANIZATION": 6,
43
+ "I-PERSON": 7,
44
+ "I-QUANTITY": 8,
45
+ "I-TIME": 9,
46
+ "O": 10
47
+ },
48
+ "layer_norm_eps": 1e-12,
49
+ "max_position_embeddings": 512,
50
+ "model_type": "bert",
51
+ "num_attention_heads": 12,
52
+ "num_hidden_layers": 12,
53
+ "output_past": true,
54
+ "pad_token_id": 0,
55
+ "position_embedding_type": "absolute",
56
+ "prediction_heads": {
57
+ "default": {
58
+ "activation_function": "gelu",
59
+ "bias": true,
60
+ "embedding_size": 768,
61
+ "head_type": "masked_lm",
62
+ "label2id": null,
63
+ "layer_norm": true,
64
+ "layers": 2,
65
+ "shift_labels": false,
66
+ "vocab_size": 31923
67
+ }
68
+ },
69
+ "torch_dtype": "float32",
70
+ "transformers_version": "4.39.3",
71
+ "type_vocab_size": 2,
72
+ "use_cache": true,
73
+ "vocab_size": 31923
74
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.963231640663818,
4
+ "eval_f1": 0.8287262872628726,
5
+ "eval_loss": 0.15753257274627686,
6
+ "eval_precision": 0.776536312849162,
7
+ "eval_recall": 0.8884369552585706,
8
+ "eval_runtime": 5.3652,
9
+ "eval_samples": 935,
10
+ "eval_samples_per_second": 174.271,
11
+ "eval_steps_per_second": 2.796
12
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.39.3"
6
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd5c58575487245194de892f203df202238c2638610190c200e2c4dc16b9a7e6
3
+ size 444752860
predict_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_accuracy": 0.9876279863481229,
3
+ "predict_f1": 0.9276586033774533,
4
+ "predict_loss": 0.04439631476998329,
5
+ "predict_precision": 0.9009308510638298,
6
+ "predict_recall": 0.9560206961429916,
7
+ "predict_runtime": 12.7417,
8
+ "predict_samples_per_second": 183.884,
9
+ "predict_steps_per_second": 2.904
10
+ }
predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
runs/May23_15-28-39_indolem-petl-vm/events.out.tfevents.1716478124.indolem-petl-vm.791594.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c1c97b8edb576ed00f8f6193b779ac51518c5f8bb18dcd25361b2cb7158e2e2
3
+ size 19478
runs/May23_15-28-39_indolem-petl-vm/events.out.tfevents.1716479516.indolem-petl-vm.791594.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2f83270c707b1e94704a01c545fa1f27573db269b0e0066fd68d917ffbf4086
3
+ size 560
runs/May23_15-58-27_indolem-petl-vm/events.out.tfevents.1716479912.indolem-petl-vm.810287.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:651943036b483dcd3a0d371b745b25c1c201a557a3e5e76eec93a397af101811
3
+ size 19478
runs/May23_15-58-27_indolem-petl-vm/events.out.tfevents.1716481277.indolem-petl-vm.810287.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:851e366ba808fe137d16f22c50c62a98cef020674ea3f18f1868fa2c6af508ef
3
+ size 560
runs/May23_16-45-56_indolem-petl-vm/events.out.tfevents.1716482761.indolem-petl-vm.843792.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67d0b8ecfda12534cd0348fdefc1a87a56fee394dd77b5e28b4ac1a470f6ddf3
3
+ size 5675
runs/May23_16-48-23_indolem-petl-vm/events.out.tfevents.1716482908.indolem-petl-vm.844581.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1af5e987d2c06e9300db5578cfbf6c3def07e8b42a910a059e84d7efb0532d0f
3
+ size 5675
runs/May23_16-55-56_indolem-petl-vm/events.out.tfevents.1716483361.indolem-petl-vm.846656.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:170145ea55da4c6109ba49bfd2ce7ddb3bc8f26dde95f870cdc43fffdd859a12
3
+ size 5675
runs/May23_17-13-50_indolem-petl-vm/events.out.tfevents.1716484464.indolem-petl-vm.862477.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:008ad69d0c18c74dab5cbe9df761563df4fba1d7fa400147a8335aadc0c644fd
3
+ size 5762
runs/May23_17-20-25_indolem-petl-vm/events.out.tfevents.1716484831.indolem-petl-vm.865169.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2c5e79aeb3f8520408582f4d8d887b2cf3c0a5b0200a3855d08be87fd142864
3
+ size 5762
runs/May23_17-24-54_indolem-petl-vm/events.out.tfevents.1716485099.indolem-petl-vm.866978.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ecc1aca1605723c71c4c49da655acc780d4eac0f0edbf244310f99cac52a8a3
3
+ size 5762
runs/May23_17-28-21_indolem-petl-vm/events.out.tfevents.1716485307.indolem-petl-vm.868628.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8a7efcd0a0dc2cfee96e915dee992a7d898f1581e9581788f2998ce89d5241c
3
+ size 6588
runs/May23_18-19-48_indolem-petl-vm/events.out.tfevents.1716488392.indolem-petl-vm.903361.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b34c5b657dcdf4d814fdd92f1b667f534b26a74d91931c02f0e5c3d6c8a7194
3
+ size 6502
runs/May23_18-19-48_indolem-petl-vm/events.out.tfevents.1716488523.indolem-petl-vm.903361.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57d4d3d0eac8c3be6fc4fca71b76a5b10e7ccec9b24371d62576bcb305dea293
3
+ size 560
runs/May23_18-42-33_indolem-petl-vm/events.out.tfevents.1716489763.indolem-petl-vm.919895.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9184fb67d1dfc5136f0150871a53be74a05eb8ac4ff1fdc396710e62c6f1465c
3
+ size 6541
runs/May23_18-42-33_indolem-petl-vm/events.out.tfevents.1716489887.indolem-petl-vm.919895.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dc1cf0a507d8a99b8b79ed43d2ec256ce9099724765f8629e364487dab2c1f5
3
+ size 560
runs/May23_18-50-55_indolem-petl-vm/events.out.tfevents.1716490265.indolem-petl-vm.922188.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79d996e5dc18420542098e44dfc96e843644ed7433881839500220b507e12fe1
3
+ size 6548
runs/May23_18-50-55_indolem-petl-vm/events.out.tfevents.1716490392.indolem-petl-vm.922188.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c3c1a468e8ffd000f21d2fe7bb631adb869fc0e100c23cee0698684aab7e8d9
3
+ size 560
runs/May23_18-55-04_indolem-petl-vm/events.out.tfevents.1716490513.indolem-petl-vm.923382.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52a385a6eb2f78ab2a4c15211ca326c7e1432460bac8eac8f1334a04fa600c14
3
+ size 6548
runs/May23_18-55-04_indolem-petl-vm/events.out.tfevents.1716490645.indolem-petl-vm.923382.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cff0cc1d7f0aa62c51e1d30d4ae5088e6c09867ff47f3176d92784e47a471f99
3
+ size 560
runs/May24_13-16-21_indolem-petl-vm/events.out.tfevents.1716556588.indolem-petl-vm.1421377.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:291eef8e9bde8d662bd20ad90b96d85f219d9f09c063385aaff191bd34f0f7ff
3
+ size 6194
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[MASK]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[CLS]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[SEP]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 1000000000000000019884624838656,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.2260448426911325,
4
+ "train_runtime": 123.2,
5
+ "train_samples": 8437,
6
+ "train_samples_per_second": 68.482,
7
+ "train_steps_per_second": 4.286
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 528,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 1.4932644367218018,
14
+ "learning_rate": 0.0,
15
+ "loss": 0.226,
16
+ "step": 528
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.963231640663818,
21
+ "eval_f1": 0.8287262872628726,
22
+ "eval_loss": 0.15753257274627686,
23
+ "eval_precision": 0.776536312849162,
24
+ "eval_recall": 0.8884369552585706,
25
+ "eval_runtime": 5.4397,
26
+ "eval_samples_per_second": 171.884,
27
+ "eval_steps_per_second": 2.757,
28
+ "step": 528
29
+ },
30
+ {
31
+ "epoch": 1.0,
32
+ "step": 528,
33
+ "total_flos": 230610745968444.0,
34
+ "train_loss": 0.2260448426911325,
35
+ "train_runtime": 123.2,
36
+ "train_samples_per_second": 68.482,
37
+ "train_steps_per_second": 4.286
38
+ }
39
+ ],
40
+ "logging_steps": 500,
41
+ "max_steps": 528,
42
+ "num_input_tokens_seen": 0,
43
+ "num_train_epochs": 1,
44
+ "save_steps": 500,
45
+ "total_flos": 230610745968444.0,
46
+ "train_batch_size": 16,
47
+ "trial_name": null,
48
+ "trial_params": null
49
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4f51aa7ff629bdc5cec0a627403282f58c5523099102704559db6feff9a94d6
3
+ size 4984
vocab.txt ADDED
The diff for this file is too large to render. See raw diff