mapama247 commited on
Commit
4864331
·
1 Parent(s): 7f93820

upload ipc_level1_B model

Browse files
config.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "../models/roberta-large/",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "finetuning_task": "ipc1",
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 1024,
14
+ "id2label": {
15
+ "0": "01",
16
+ "1": "02",
17
+ "2": "03",
18
+ "3": "04",
19
+ "4": "05",
20
+ "5": "06",
21
+ "6": "07",
22
+ "7": "08",
23
+ "8": "09",
24
+ "9": "21",
25
+ "10": "22",
26
+ "11": "23",
27
+ "12": "24",
28
+ "13": "25",
29
+ "14": "26",
30
+ "15": "27",
31
+ "16": "28",
32
+ "17": "29",
33
+ "18": "30",
34
+ "19": "31",
35
+ "20": "32",
36
+ "21": "33",
37
+ "22": "41",
38
+ "23": "42",
39
+ "24": "43",
40
+ "25": "44",
41
+ "26": "60",
42
+ "27": "61",
43
+ "28": "62",
44
+ "29": "63",
45
+ "30": "64",
46
+ "31": "65",
47
+ "32": "66",
48
+ "33": "67",
49
+ "34": "68",
50
+ "35": "81",
51
+ "36": "82",
52
+ "37": "99"
53
+ },
54
+ "initializer_range": 0.02,
55
+ "intermediate_size": 4096,
56
+ "label2id": {
57
+ "01": 0,
58
+ "02": 1,
59
+ "03": 2,
60
+ "04": 3,
61
+ "05": 4,
62
+ "06": 5,
63
+ "07": 6,
64
+ "08": 7,
65
+ "09": 8,
66
+ "21": 9,
67
+ "22": 10,
68
+ "23": 11,
69
+ "24": 12,
70
+ "25": 13,
71
+ "26": 14,
72
+ "27": 15,
73
+ "28": 16,
74
+ "29": 17,
75
+ "30": 18,
76
+ "31": 19,
77
+ "32": 20,
78
+ "33": 21,
79
+ "41": 22,
80
+ "42": 23,
81
+ "43": 24,
82
+ "44": 25,
83
+ "60": 26,
84
+ "61": 27,
85
+ "62": 28,
86
+ "63": 29,
87
+ "64": 30,
88
+ "65": 31,
89
+ "66": 32,
90
+ "67": 33,
91
+ "68": 34,
92
+ "81": 35,
93
+ "82": 36,
94
+ "99": 37
95
+ },
96
+ "layer_norm_eps": 1e-05,
97
+ "max_position_embeddings": 514,
98
+ "model_type": "roberta",
99
+ "num_attention_heads": 16,
100
+ "num_hidden_layers": 24,
101
+ "pad_token_id": 1,
102
+ "position_embedding_type": "absolute",
103
+ "problem_type": "multi_label_classification",
104
+ "torch_dtype": "float32",
105
+ "transformers_version": "4.9.2",
106
+ "type_vocab_size": 1,
107
+ "use_cache": true,
108
+ "vocab_size": 50265
109
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb5808eb38f5ac0aeff73eeb580c6893c4776f5bd0114f0531adcdf219c43f4c
3
+ size 1421758893
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a09352109e9ac36d6d9075a8cad7cf9bdeab4cc85d8c706d0500f2115574986
3
+ size 15523
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "special_tokens_map_file": null, "name_or_path": "../models/roberta-large/", "tokenizer_class": "RobertaTokenizer"}
trainer_state.json ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7610864260798164,
3
+ "best_model_checkpoint": "./output//roberta-large_ipc1_B_5_32_5e-6_0.01_0.06_07-07-22_18-34/checkpoint-20000",
4
+ "epoch": 0.09819252116662534,
5
+ "global_step": 20000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 1.6365272890925457e-07,
13
+ "loss": 0.4942,
14
+ "step": 2000
15
+ },
16
+ {
17
+ "epoch": 0.01,
18
+ "eval_accuracy": 0.0,
19
+ "eval_f1": 3.725980731356789e-05,
20
+ "eval_loss": 0.242140531539917,
21
+ "eval_roc_auc": 0.5000018812220105,
22
+ "eval_runtime": 10105.0715,
23
+ "eval_samples_per_second": 33.362,
24
+ "eval_steps_per_second": 2.085,
25
+ "step": 2000
26
+ },
27
+ {
28
+ "epoch": 0.02,
29
+ "learning_rate": 3.2730545781850913e-07,
30
+ "loss": 0.1998,
31
+ "step": 4000
32
+ },
33
+ {
34
+ "epoch": 0.02,
35
+ "eval_accuracy": 0.0,
36
+ "eval_f1": 0.0,
37
+ "eval_loss": 0.15237019956111908,
38
+ "eval_roc_auc": 0.5,
39
+ "eval_runtime": 10093.7625,
40
+ "eval_samples_per_second": 33.4,
41
+ "eval_steps_per_second": 2.088,
42
+ "step": 4000
43
+ },
44
+ {
45
+ "epoch": 0.03,
46
+ "learning_rate": 4.909581867277637e-07,
47
+ "loss": 0.1391,
48
+ "step": 6000
49
+ },
50
+ {
51
+ "epoch": 0.03,
52
+ "eval_accuracy": 0.027651218528274128,
53
+ "eval_f1": 0.051996127342103765,
54
+ "eval_loss": 0.11342979222536087,
55
+ "eval_roc_auc": 0.5133426998907926,
56
+ "eval_runtime": 10096.0276,
57
+ "eval_samples_per_second": 33.392,
58
+ "eval_steps_per_second": 2.087,
59
+ "step": 6000
60
+ },
61
+ {
62
+ "epoch": 0.04,
63
+ "learning_rate": 6.546109156370183e-07,
64
+ "loss": 0.1014,
65
+ "step": 8000
66
+ },
67
+ {
68
+ "epoch": 0.04,
69
+ "eval_accuracy": 0.378618803540495,
70
+ "eval_f1": 0.5170973118634858,
71
+ "eval_loss": 0.08398378640413284,
72
+ "eval_roc_auc": 0.6863826768837573,
73
+ "eval_runtime": 10090.8157,
74
+ "eval_samples_per_second": 33.409,
75
+ "eval_steps_per_second": 2.088,
76
+ "step": 8000
77
+ },
78
+ {
79
+ "epoch": 0.05,
80
+ "learning_rate": 8.182636445462728e-07,
81
+ "loss": 0.0776,
82
+ "step": 10000
83
+ },
84
+ {
85
+ "epoch": 0.05,
86
+ "eval_accuracy": 0.5363778742792056,
87
+ "eval_f1": 0.6512947171889306,
88
+ "eval_loss": 0.06587187945842743,
89
+ "eval_roc_auc": 0.764578950366969,
90
+ "eval_runtime": 10097.484,
91
+ "eval_samples_per_second": 33.387,
92
+ "eval_steps_per_second": 2.087,
93
+ "step": 10000
94
+ },
95
+ {
96
+ "epoch": 0.06,
97
+ "learning_rate": 9.819163734555274e-07,
98
+ "loss": 0.0634,
99
+ "step": 12000
100
+ },
101
+ {
102
+ "epoch": 0.06,
103
+ "eval_accuracy": 0.6100976483709452,
104
+ "eval_f1": 0.7042497089763607,
105
+ "eval_loss": 0.05614431947469711,
106
+ "eval_roc_auc": 0.8049277667745456,
107
+ "eval_runtime": 10099.2828,
108
+ "eval_samples_per_second": 33.381,
109
+ "eval_steps_per_second": 2.086,
110
+ "step": 12000
111
+ },
112
+ {
113
+ "epoch": 0.07,
114
+ "learning_rate": 1.145569102364782e-06,
115
+ "loss": 0.055,
116
+ "step": 14000
117
+ },
118
+ {
119
+ "epoch": 0.07,
120
+ "eval_accuracy": 0.6416227664269951,
121
+ "eval_f1": 0.7269993152493521,
122
+ "eval_loss": 0.05014372989535332,
123
+ "eval_roc_auc": 0.8223509407753063,
124
+ "eval_runtime": 10095.4543,
125
+ "eval_samples_per_second": 33.394,
126
+ "eval_steps_per_second": 2.087,
127
+ "step": 14000
128
+ },
129
+ {
130
+ "epoch": 0.08,
131
+ "learning_rate": 1.3092218312740365e-06,
132
+ "loss": 0.0502,
133
+ "step": 16000
134
+ },
135
+ {
136
+ "epoch": 0.08,
137
+ "eval_accuracy": 0.6483650126954748,
138
+ "eval_f1": 0.7361196439111947,
139
+ "eval_loss": 0.04603615403175354,
140
+ "eval_roc_auc": 0.8236568822502442,
141
+ "eval_runtime": 10103.5894,
142
+ "eval_samples_per_second": 33.367,
143
+ "eval_steps_per_second": 2.085,
144
+ "step": 16000
145
+ },
146
+ {
147
+ "epoch": 0.09,
148
+ "learning_rate": 1.472874560183291e-06,
149
+ "loss": 0.0464,
150
+ "step": 18000
151
+ },
152
+ {
153
+ "epoch": 0.09,
154
+ "eval_accuracy": 0.6700867326356754,
155
+ "eval_f1": 0.7512896154078634,
156
+ "eval_loss": 0.043166279792785645,
157
+ "eval_roc_auc": 0.8373107879447877,
158
+ "eval_runtime": 10097.7203,
159
+ "eval_samples_per_second": 33.387,
160
+ "eval_steps_per_second": 2.087,
161
+ "step": 18000
162
+ },
163
+ {
164
+ "epoch": 0.1,
165
+ "learning_rate": 1.6365272890925457e-06,
166
+ "loss": 0.0441,
167
+ "step": 20000
168
+ },
169
+ {
170
+ "epoch": 0.1,
171
+ "eval_accuracy": 0.6828178021404333,
172
+ "eval_f1": 0.7610864260798164,
173
+ "eval_loss": 0.0410546213388443,
174
+ "eval_roc_auc": 0.8459980425262335,
175
+ "eval_runtime": 10102.9844,
176
+ "eval_samples_per_second": 33.369,
177
+ "eval_steps_per_second": 2.086,
178
+ "step": 20000
179
+ }
180
+ ],
181
+ "max_steps": 1018405,
182
+ "num_train_epochs": 5,
183
+ "total_flos": 5.9650862088192e+17,
184
+ "trial_name": null,
185
+ "trial_params": null
186
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f78d41f00a28720c117b2e2c2e257914d9398ad8582ef388ea96f41fc0ecf6a3
3
+ size 2735
vocab.json ADDED
The diff for this file is too large to render. See raw diff