diffuserconfuser commited on
Commit
83eef69
1 Parent(s): 0c10fab

Upload folder using huggingface_hub

Browse files
added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "</s>": 2,
3
+ "<mask>": 50264,
4
+ "<pad>": 1,
5
+ "<s>": 0,
6
+ "<unk>": 3
7
+ }
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-base",
3
+ "architectures": [
4
+ "RobertaForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.34.0",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 50265
27
+ }
cout.txt ADDED
The diff for this file is too large to render. See raw diff
 
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa602398a1ffa4c5da5e93c757e4ea11280062033d3ec5256c85c4c79de31e0c
3
+ size 992619066
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17ba528f3103b7a10c948793afbff99655b7d07ece344db03155d9f777ec726d
3
+ size 496295078
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc3901c5e78db8dfecd4a40a0f18caca31434c760459a2305cdf1ab363e99c42
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:989935366fc9ee41649a69d799b25864bc75ed0b6701cad0acf3e049eaa6deec
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "</s>",
8
+ "unk_token": "<unk>"
9
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "additional_special_tokens": [],
46
+ "bos_token": "<s>",
47
+ "clean_up_tokenization_spaces": true,
48
+ "cls_token": "<s>",
49
+ "eos_token": "</s>",
50
+ "errors": "replace",
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
trainer_state.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8387653827667236,
3
+ "best_model_checkpoint": "finetune_roberta-base/checkpoint-5476",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 5476,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.18,
13
+ "learning_rate": 1.879474068663258e-05,
14
+ "loss": 1.7148,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.37,
19
+ "learning_rate": 1.7577307036766496e-05,
20
+ "loss": 1.075,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.55,
25
+ "learning_rate": 1.6359873386900417e-05,
26
+ "loss": 0.9886,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 0.73,
31
+ "learning_rate": 1.5142439737034334e-05,
32
+ "loss": 0.9376,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 0.91,
37
+ "learning_rate": 1.3925006087168251e-05,
38
+ "loss": 0.9256,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 1.0,
43
+ "eval_loss": 0.8695176839828491,
44
+ "eval_runtime": 78.4582,
45
+ "eval_samples_per_second": 134.721,
46
+ "eval_steps_per_second": 1.058,
47
+ "step": 2738
48
+ },
49
+ {
50
+ "epoch": 1.1,
51
+ "learning_rate": 1.2707572437302169e-05,
52
+ "loss": 0.8303,
53
+ "step": 3000
54
+ },
55
+ {
56
+ "epoch": 1.28,
57
+ "learning_rate": 1.1490138787436084e-05,
58
+ "loss": 0.7641,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 1.46,
63
+ "learning_rate": 1.0275140004869735e-05,
64
+ "loss": 0.7347,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 1.64,
69
+ "learning_rate": 9.060141222303384e-06,
70
+ "loss": 0.737,
71
+ "step": 4500
72
+ },
73
+ {
74
+ "epoch": 1.83,
75
+ "learning_rate": 7.842707572437303e-06,
76
+ "loss": 0.7466,
77
+ "step": 5000
78
+ },
79
+ {
80
+ "epoch": 2.0,
81
+ "eval_loss": 0.8387653827667236,
82
+ "eval_runtime": 77.9854,
83
+ "eval_samples_per_second": 135.538,
84
+ "eval_steps_per_second": 1.064,
85
+ "step": 5476
86
+ }
87
+ ],
88
+ "logging_steps": 500,
89
+ "max_steps": 8214,
90
+ "num_train_epochs": 3,
91
+ "save_steps": 500,
92
+ "total_flos": 3.4334001889975296e+16,
93
+ "trial_name": null,
94
+ "trial_params": null
95
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8bc89e2e64fa414dc4c0a103cb82e4fdc230c40d353a9e60416c2ee89c05cfb
3
+ size 4472
vocab.json ADDED
The diff for this file is too large to render. See raw diff