anik-owl commited on
Commit
67dea7d
·
verified ·
1 Parent(s): 0c21072

Upload 5 files

Browse files
roberta-large-classifier-v01/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 1024,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 4096,
16
+ "is_decoder": false,
17
+ "layer_norm_eps": 1e-05,
18
+ "max_position_embeddings": 514,
19
+ "model_type": "roberta",
20
+ "num_attention_heads": 16,
21
+ "num_hidden_layers": 24,
22
+ "pad_token_id": 1,
23
+ "problem_type": "single_label_classification",
24
+ "tie_word_embeddings": true,
25
+ "transformers_version": "5.0.0",
26
+ "type_vocab_size": 1,
27
+ "use_cache": false,
28
+ "vocab_size": 50270
29
+ }
roberta-large-classifier-v01/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76ced2983410dd6cfdc639c34ca0373c8b30b4c6f37665baaa8f04a2ed33b300
3
+ size 1421515840
roberta-large-classifier-v01/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d56082619687d8922fda778dde804ffb3ae7223abd00de80aa1b754e6cf2b574
3
+ size 1465
roberta-large-classifier-v01/trainer_state.json ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 682,
3
+ "best_metric": 0.632375189107413,
4
+ "best_model_checkpoint": "./roberta-large-normclsf-wtrainer_v01/checkpoint-682",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 682,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.1467351430667645,
14
+ "grad_norm": 100.62623596191406,
15
+ "learning_rate": 1.6333333333333335e-06,
16
+ "loss": 4.063176574707032,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.293470286133529,
21
+ "grad_norm": 96.6983871459961,
22
+ "learning_rate": 3.3000000000000006e-06,
23
+ "loss": 2.3282891845703126,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.4402054292002935,
28
+ "grad_norm": 28.92310905456543,
29
+ "learning_rate": 4.966666666666667e-06,
30
+ "loss": 2.171337890625,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.586940572267058,
35
+ "grad_norm": 81.281982421875,
36
+ "learning_rate": 6.633333333333334e-06,
37
+ "loss": 1.71757568359375,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.7336757153338225,
42
+ "grad_norm": 62.47601318359375,
43
+ "learning_rate": 8.3e-06,
44
+ "loss": 1.4878105163574218,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 0.880410858400587,
49
+ "grad_norm": 44.91110610961914,
50
+ "learning_rate": 9.966666666666667e-06,
51
+ "loss": 1.4716207885742187,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 1.0,
56
+ "eval_accuracy": 0.9375343973582829,
57
+ "eval_f1": 0.5032822757111597,
58
+ "eval_loss": 0.5080869197845459,
59
+ "eval_macro_f1": 0.7349769182108874,
60
+ "eval_precision": 0.8214285714285714,
61
+ "eval_recall": 0.3627760252365931,
62
+ "eval_runtime": 130.3538,
63
+ "eval_samples_per_second": 27.878,
64
+ "eval_steps_per_second": 1.749,
65
+ "step": 341
66
+ },
67
+ {
68
+ "epoch": 1.0264123257520177,
69
+ "grad_norm": 10.23391056060791,
70
+ "learning_rate": 9.651245551601425e-06,
71
+ "loss": 1.3976800537109375,
72
+ "step": 350
73
+ },
74
+ {
75
+ "epoch": 1.1731474688187822,
76
+ "grad_norm": 169.54437255859375,
77
+ "learning_rate": 9.295373665480429e-06,
78
+ "loss": 1.2954891967773436,
79
+ "step": 400
80
+ },
81
+ {
82
+ "epoch": 1.3198826118855465,
83
+ "grad_norm": 105.23279571533203,
84
+ "learning_rate": 8.939501779359432e-06,
85
+ "loss": 1.096920166015625,
86
+ "step": 450
87
+ },
88
+ {
89
+ "epoch": 1.4666177549523112,
90
+ "grad_norm": 68.01177215576172,
91
+ "learning_rate": 8.583629893238435e-06,
92
+ "loss": 1.396348419189453,
93
+ "step": 500
94
+ },
95
+ {
96
+ "epoch": 1.6133528980190754,
97
+ "grad_norm": 35.087650299072266,
98
+ "learning_rate": 8.227758007117438e-06,
99
+ "loss": 1.2719633483886719,
100
+ "step": 550
101
+ },
102
+ {
103
+ "epoch": 1.7600880410858402,
104
+ "grad_norm": 26.51964569091797,
105
+ "learning_rate": 7.87188612099644e-06,
106
+ "loss": 1.2927476501464843,
107
+ "step": 600
108
+ },
109
+ {
110
+ "epoch": 1.9068231841526044,
111
+ "grad_norm": 26.03403663635254,
112
+ "learning_rate": 7.516014234875445e-06,
113
+ "loss": 1.3671530151367188,
114
+ "step": 650
115
+ },
116
+ {
117
+ "epoch": 2.0,
118
+ "eval_accuracy": 0.9331315354980737,
119
+ "eval_f1": 0.632375189107413,
120
+ "eval_loss": 0.3130079507827759,
121
+ "eval_macro_f1": 0.7977980077518296,
122
+ "eval_precision": 0.6075581395348837,
123
+ "eval_recall": 0.6593059936908517,
124
+ "eval_runtime": 130.5148,
125
+ "eval_samples_per_second": 27.844,
126
+ "eval_steps_per_second": 1.747,
127
+ "step": 682
128
+ }
129
+ ],
130
+ "logging_steps": 50,
131
+ "max_steps": 1705,
132
+ "num_input_tokens_seen": 0,
133
+ "num_train_epochs": 5,
134
+ "save_steps": 500,
135
+ "stateful_callbacks": {
136
+ "EarlyStoppingCallback": {
137
+ "args": {
138
+ "early_stopping_patience": 2,
139
+ "early_stopping_threshold": 0.0
140
+ },
141
+ "attributes": {
142
+ "early_stopping_patience_counter": 0
143
+ }
144
+ },
145
+ "TrainerControl": {
146
+ "args": {
147
+ "should_epoch_stop": false,
148
+ "should_evaluate": false,
149
+ "should_log": false,
150
+ "should_save": true,
151
+ "should_training_stop": false
152
+ },
153
+ "attributes": {}
154
+ }
155
+ },
156
+ "total_flos": 1.0158983791638528e+16,
157
+ "train_batch_size": 8,
158
+ "trial_name": null,
159
+ "trial_params": null
160
+ }
roberta-large-classifier-v01/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62f1c6fa212d0e8b7fd77c046d3f2891a6f41114e6733b7358804951d3255250
3
+ size 5201