saroyehun commited on
Commit
087edd6
·
verified ·
1 Parent(s): 88514dc

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": 0.1,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 1024,
14
+ "id2label": {
15
+ "0": "Nomove",
16
+ "1": "participation management",
17
+ "2": "PressReasoning",
18
+ "3": "PressAccuracy",
19
+ "4": "Uptake (Restating or revoicing)"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 4096,
23
+ "is_decoder": false,
24
+ "label2id": {
25
+ "Nomove": 0,
26
+ "PressAccuracy": 3,
27
+ "PressReasoning": 2,
28
+ "Uptake (Restating or revoicing)": 4,
29
+ "participation management": 1
30
+ },
31
+ "layer_norm_eps": 1e-05,
32
+ "max_position_embeddings": 514,
33
+ "model_type": "roberta",
34
+ "num_attention_heads": 16,
35
+ "num_hidden_layers": 24,
36
+ "pad_token_id": 1,
37
+ "problem_type": "single_label_classification",
38
+ "tie_word_embeddings": true,
39
+ "transformers_version": "5.2.0",
40
+ "type_vocab_size": 1,
41
+ "use_cache": false,
42
+ "vocab_size": 50265
43
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ac304968e4505914406f0e683f046c1a0a8f16f8512408e97b73935f235d67c
3
+ size 1421507660
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:694463e9ee07c81ba81cbc05b22ba22f93a94348a788e9691f7e78da071ea16a
3
+ size 2843253758
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec38223b4e1bacb29bb096764307e58cf77809d88d73eb235262daaa76831a57
3
+ size 14645
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab067424c5f7dc2022d597dda27e0f3a7afb728861479487bc1c0cef42a639db
3
+ size 1465
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<s>",
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "errors": "replace",
8
+ "is_local": false,
9
+ "mask_token": "<mask>",
10
+ "model_max_length": 512,
11
+ "pad_token": "<pad>",
12
+ "sep_token": "</s>",
13
+ "tokenizer_class": "RobertaTokenizer",
14
+ "trim_offsets": true,
15
+ "unk_token": "<unk>"
16
+ }
trainer_state.json ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2088,
3
+ "best_metric": 0.8285096878151668,
4
+ "best_model_checkpoint": "./robertalarge_multiclass/checkpoint-2088",
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 2088,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "grad_norm": 38.4963493347168,
15
+ "learning_rate": 1.996168582375479e-05,
16
+ "loss": 0.7634037061669361,
17
+ "step": 522
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_Nomove_f1": 0.8534048976853404,
22
+ "eval_Nomove_precision": 0.8588791357191087,
23
+ "eval_Nomove_recall": 0.848,
24
+ "eval_Nomove_support": 3000,
25
+ "eval_PressAccuracy_f1": 0.875776397515528,
26
+ "eval_PressAccuracy_precision": 0.8788469029996104,
27
+ "eval_PressAccuracy_recall": 0.8727272727272727,
28
+ "eval_PressAccuracy_support": 2585,
29
+ "eval_PressReasoning_f1": 0.7333333333333333,
30
+ "eval_PressReasoning_precision": 0.6212624584717608,
31
+ "eval_PressReasoning_recall": 0.8947368421052632,
32
+ "eval_PressReasoning_support": 209,
33
+ "eval_Uptake (Restating or revoicing)_f1": 0.597979797979798,
34
+ "eval_Uptake (Restating or revoicing)_precision": 0.5025466893039049,
35
+ "eval_Uptake (Restating or revoicing)_recall": 0.7381546134663342,
36
+ "eval_Uptake (Restating or revoicing)_support": 401,
37
+ "eval_accuracy": 0.8313151900251768,
38
+ "eval_loss": 0.5038919448852539,
39
+ "eval_macro_f1": 0.7744391016253172,
40
+ "eval_macro_precision": 0.7441072454154222,
41
+ "eval_macro_recall": 0.8245914064239864,
42
+ "eval_mcc": 0.7642954946246379,
43
+ "eval_participation management_f1": 0.811701081612586,
44
+ "eval_participation management_precision": 0.8590010405827263,
45
+ "eval_participation management_recall": 0.7693383038210625,
46
+ "eval_participation management_support": 2146,
47
+ "eval_runtime": 7.1557,
48
+ "eval_samples_per_second": 1165.647,
49
+ "eval_steps_per_second": 18.307,
50
+ "eval_weighted_f1": 0.8343200776202896,
51
+ "eval_weighted_precision": 0.8420139024976997,
52
+ "eval_weighted_recall": 0.8313151900251768,
53
+ "step": 522
54
+ },
55
+ {
56
+ "epoch": 2.0,
57
+ "grad_norm": 6.410867214202881,
58
+ "learning_rate": 1.7782034908471692e-05,
59
+ "loss": 0.4319716997987009,
60
+ "step": 1044
61
+ },
62
+ {
63
+ "epoch": 2.0,
64
+ "eval_Nomove_f1": 0.8761967646087818,
65
+ "eval_Nomove_precision": 0.8678875081752779,
66
+ "eval_Nomove_recall": 0.8846666666666667,
67
+ "eval_Nomove_support": 3000,
68
+ "eval_PressAccuracy_f1": 0.8749497386409328,
69
+ "eval_PressAccuracy_precision": 0.9108413562159899,
70
+ "eval_PressAccuracy_recall": 0.8417794970986461,
71
+ "eval_PressAccuracy_support": 2585,
72
+ "eval_PressReasoning_f1": 0.7859078590785908,
73
+ "eval_PressReasoning_precision": 0.90625,
74
+ "eval_PressReasoning_recall": 0.69377990430622,
75
+ "eval_PressReasoning_support": 209,
76
+ "eval_Uptake (Restating or revoicing)_f1": 0.6924101198402132,
77
+ "eval_Uptake (Restating or revoicing)_precision": 0.7428571428571429,
78
+ "eval_Uptake (Restating or revoicing)_recall": 0.6483790523690773,
79
+ "eval_Uptake (Restating or revoicing)_support": 401,
80
+ "eval_accuracy": 0.8515765495743916,
81
+ "eval_loss": 0.4433172047138214,
82
+ "eval_macro_f1": 0.8108377087957347,
83
+ "eval_macro_precision": 0.8422786108456553,
84
+ "eval_macro_recall": 0.7878123568001445,
85
+ "eval_mcc": 0.7898141732587503,
86
+ "eval_participation management_f1": 0.8247240618101546,
87
+ "eval_participation management_precision": 0.7835570469798657,
88
+ "eval_participation management_recall": 0.8704566635601119,
89
+ "eval_participation management_support": 2146,
90
+ "eval_runtime": 7.1565,
91
+ "eval_samples_per_second": 1165.511,
92
+ "eval_steps_per_second": 18.305,
93
+ "eval_weighted_f1": 0.8514691770124805,
94
+ "eval_weighted_precision": 0.8544530412958486,
95
+ "eval_weighted_recall": 0.8515765495743916,
96
+ "step": 1044
97
+ },
98
+ {
99
+ "epoch": 3.0,
100
+ "grad_norm": 15.955626487731934,
101
+ "learning_rate": 1.555981268624947e-05,
102
+ "loss": 0.33474684675077826,
103
+ "step": 1566
104
+ },
105
+ {
106
+ "epoch": 3.0,
107
+ "eval_Nomove_f1": 0.8818949809508034,
108
+ "eval_Nomove_precision": 0.8765228844254198,
109
+ "eval_Nomove_recall": 0.8873333333333333,
110
+ "eval_Nomove_support": 3000,
111
+ "eval_PressAccuracy_f1": 0.895219512195122,
112
+ "eval_PressAccuracy_precision": 0.9031496062992126,
113
+ "eval_PressAccuracy_recall": 0.8874274661508704,
114
+ "eval_PressAccuracy_support": 2585,
115
+ "eval_PressReasoning_f1": 0.7851851851851851,
116
+ "eval_PressReasoning_precision": 0.8112244897959183,
117
+ "eval_PressReasoning_recall": 0.7607655502392344,
118
+ "eval_PressReasoning_support": 209,
119
+ "eval_Uptake (Restating or revoicing)_f1": 0.7187079407806191,
120
+ "eval_Uptake (Restating or revoicing)_precision": 0.7807017543859649,
121
+ "eval_Uptake (Restating or revoicing)_recall": 0.6658354114713217,
122
+ "eval_Uptake (Restating or revoicing)_support": 401,
123
+ "eval_accuracy": 0.8676417695719938,
124
+ "eval_loss": 0.4420914053916931,
125
+ "eval_macro_f1": 0.8259179007665363,
126
+ "eval_macro_precision": 0.8409864136479698,
127
+ "eval_macro_recall": 0.8131521285670041,
128
+ "eval_mcc": 0.8119580097491912,
129
+ "eval_participation management_f1": 0.8485818847209515,
130
+ "eval_participation management_precision": 0.8333333333333334,
131
+ "eval_participation management_recall": 0.8643988816402609,
132
+ "eval_participation management_support": 2146,
133
+ "eval_runtime": 7.1677,
134
+ "eval_samples_per_second": 1163.701,
135
+ "eval_steps_per_second": 18.277,
136
+ "eval_weighted_f1": 0.867184953176441,
137
+ "eval_weighted_precision": 0.8674200984017715,
138
+ "eval_weighted_recall": 0.8676417695719938,
139
+ "step": 1566
140
+ },
141
+ {
142
+ "epoch": 4.0,
143
+ "grad_norm": 36.6058464050293,
144
+ "learning_rate": 1.3337590464027246e-05,
145
+ "loss": 0.2601611257969648,
146
+ "step": 2088
147
+ },
148
+ {
149
+ "epoch": 4.0,
150
+ "eval_Nomove_f1": 0.881498224087827,
151
+ "eval_Nomove_precision": 0.8547276142767689,
152
+ "eval_Nomove_recall": 0.91,
153
+ "eval_Nomove_support": 3000,
154
+ "eval_PressAccuracy_f1": 0.8888455538221529,
155
+ "eval_PressAccuracy_precision": 0.8961856075501377,
156
+ "eval_PressAccuracy_recall": 0.8816247582205029,
157
+ "eval_PressAccuracy_support": 2585,
158
+ "eval_PressReasoning_f1": 0.8066037735849058,
159
+ "eval_PressReasoning_precision": 0.7953488372093023,
160
+ "eval_PressReasoning_recall": 0.8181818181818182,
161
+ "eval_PressReasoning_support": 209,
162
+ "eval_Uptake (Restating or revoicing)_f1": 0.7253613666228647,
163
+ "eval_Uptake (Restating or revoicing)_precision": 0.7666666666666667,
164
+ "eval_Uptake (Restating or revoicing)_recall": 0.6882793017456359,
165
+ "eval_Uptake (Restating or revoicing)_support": 401,
166
+ "eval_accuracy": 0.8644047476321784,
167
+ "eval_loss": 0.45752763748168945,
168
+ "eval_macro_f1": 0.8285096878151668,
169
+ "eval_macro_precision": 0.8354787959044984,
170
+ "eval_macro_recall": 0.8230840908206446,
171
+ "eval_mcc": 0.8074382310097558,
172
+ "eval_participation management_f1": 0.8402395209580839,
173
+ "eval_participation management_precision": 0.8644652538196156,
174
+ "eval_participation management_recall": 0.8173345759552656,
175
+ "eval_participation management_support": 2146,
176
+ "eval_runtime": 7.1558,
177
+ "eval_samples_per_second": 1165.633,
178
+ "eval_steps_per_second": 18.307,
179
+ "eval_weighted_f1": 0.8637770696037415,
180
+ "eval_weighted_precision": 0.8643599464517906,
181
+ "eval_weighted_recall": 0.8644047476321784,
182
+ "step": 2088
183
+ }
184
+ ],
185
+ "logging_steps": 500,
186
+ "max_steps": 5220,
187
+ "num_input_tokens_seen": 0,
188
+ "num_train_epochs": 10,
189
+ "save_steps": 500,
190
+ "stateful_callbacks": {
191
+ "TrainerControl": {
192
+ "args": {
193
+ "should_epoch_stop": false,
194
+ "should_evaluate": false,
195
+ "should_log": false,
196
+ "should_save": true,
197
+ "should_training_stop": false
198
+ },
199
+ "attributes": {}
200
+ }
201
+ },
202
+ "total_flos": 1.1956807869597816e+16,
203
+ "train_batch_size": 64,
204
+ "trial_name": null,
205
+ "trial_params": null
206
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2284e75e30578949fb784db96b4b1ffd5fb26daab6fcaaca58ecfc91092e52c3
3
+ size 5201