raghvendramall commited on
Commit
1153745
·
verified ·
1 Parent(s): 5125604

Initial commit of bert-uncased news classifier model

Browse files
Files changed (7) hide show
  1. config.json +115 -0
  2. model.safetensors +3 -0
  3. optimizer.pt +3 -0
  4. rng_state.pth +3 -0
  5. scheduler.pt +3 -0
  6. trainer_state.json +191 -0
  7. training_args.bin +3 -0
config.json ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-bert/bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "U.S. NEWS",
14
+ "1": "COMEDY",
15
+ "10": "ENVIRONMENT",
16
+ "11": "EDUCATION",
17
+ "12": "CRIME",
18
+ "13": "SCIENCE",
19
+ "14": "WELLNESS",
20
+ "15": "BUSINESS",
21
+ "16": "STYLE & BEAUTY",
22
+ "17": "FOOD & DRINK",
23
+ "18": "MEDIA",
24
+ "19": "QUEER VOICES",
25
+ "2": "PARENTING",
26
+ "20": "HOME & LIVING",
27
+ "21": "WOMEN",
28
+ "22": "BLACK VOICES",
29
+ "23": "TRAVEL",
30
+ "24": "MONEY",
31
+ "25": "RELIGION",
32
+ "26": "LATINO VOICES",
33
+ "27": "IMPACT",
34
+ "28": "WEDDINGS",
35
+ "29": "COLLEGE",
36
+ "3": "WORLD NEWS",
37
+ "30": "PARENTS",
38
+ "31": "ARTS & CULTURE",
39
+ "32": "STYLE",
40
+ "33": "GREEN",
41
+ "34": "TASTE",
42
+ "35": "HEALTHY LIVING",
43
+ "36": "THE WORLDPOST",
44
+ "37": "GOOD NEWS",
45
+ "38": "WORLDPOST",
46
+ "39": "FIFTY",
47
+ "4": "CULTURE & ARTS",
48
+ "40": "ARTS",
49
+ "41": "DIVORCE",
50
+ "5": "TECH",
51
+ "6": "SPORTS",
52
+ "7": "ENTERTAINMENT",
53
+ "8": "POLITICS",
54
+ "9": "WEIRD NEWS"
55
+ },
56
+ "initializer_range": 0.02,
57
+ "intermediate_size": 3072,
58
+ "label2id": {
59
+ "ARTS": "40",
60
+ "ARTS & CULTURE": "31",
61
+ "BLACK VOICES": "22",
62
+ "BUSINESS": "15",
63
+ "COLLEGE": "29",
64
+ "COMEDY": "1",
65
+ "CRIME": "12",
66
+ "CULTURE & ARTS": "4",
67
+ "DIVORCE": "41",
68
+ "EDUCATION": "11",
69
+ "ENTERTAINMENT": "7",
70
+ "ENVIRONMENT": "10",
71
+ "FIFTY": "39",
72
+ "FOOD & DRINK": "17",
73
+ "GOOD NEWS": "37",
74
+ "GREEN": "33",
75
+ "HEALTHY LIVING": "35",
76
+ "HOME & LIVING": "20",
77
+ "IMPACT": "27",
78
+ "LATINO VOICES": "26",
79
+ "MEDIA": "18",
80
+ "MONEY": "24",
81
+ "PARENTING": "2",
82
+ "PARENTS": "30",
83
+ "POLITICS": "8",
84
+ "QUEER VOICES": "19",
85
+ "RELIGION": "25",
86
+ "SCIENCE": "13",
87
+ "SPORTS": "6",
88
+ "STYLE": "32",
89
+ "STYLE & BEAUTY": "16",
90
+ "TASTE": "34",
91
+ "TECH": "5",
92
+ "THE WORLDPOST": "36",
93
+ "TRAVEL": "23",
94
+ "U.S. NEWS": "0",
95
+ "WEDDINGS": "28",
96
+ "WEIRD NEWS": "9",
97
+ "WELLNESS": "14",
98
+ "WOMEN": "21",
99
+ "WORLD NEWS": "3",
100
+ "WORLDPOST": "38"
101
+ },
102
+ "layer_norm_eps": 1e-12,
103
+ "max_position_embeddings": 512,
104
+ "model_type": "bert",
105
+ "num_attention_heads": 12,
106
+ "num_hidden_layers": 12,
107
+ "pad_token_id": 0,
108
+ "position_embedding_type": "absolute",
109
+ "problem_type": "single_label_classification",
110
+ "torch_dtype": "float32",
111
+ "transformers_version": "4.48.0.dev0",
112
+ "type_vocab_size": 2,
113
+ "use_cache": true,
114
+ "vocab_size": 30522
115
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa6be0aaac658b4e79fab7e42f7f9d4d32c08425baa21ac4fde8e4022dd7384b
3
+ size 438081688
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3555de87341a2dcc8ec06724994441973550924e20a21e3f59d4534a16b5f82
3
+ size 876287627
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:466050a5d97bb23ab888aaec723ecc1ca4313d5bb3cdff24cbf1d438a836b6bd
3
+ size 14645
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3513ac14236a591f848e8f70b3bc13614b16e138e093f56d78106ac22054847
3
+ size 1465
trainer_state.json ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7803345268228339,
3
+ "best_model_checkpoint": "../models/bert-uncased-news-classifier/checkpoint-2096",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2096,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.09541984732824428,
13
+ "grad_norm": 5.648463249206543,
14
+ "learning_rate": 4.9522900763358784e-05,
15
+ "loss": 2.538,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.19083969465648856,
20
+ "grad_norm": 3.8880655765533447,
21
+ "learning_rate": 4.904580152671756e-05,
22
+ "loss": 1.6729,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.2862595419847328,
27
+ "grad_norm": 5.740947723388672,
28
+ "learning_rate": 4.856870229007634e-05,
29
+ "loss": 1.461,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.3816793893129771,
34
+ "grad_norm": 3.8042280673980713,
35
+ "learning_rate": 4.809160305343512e-05,
36
+ "loss": 1.3513,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.4770992366412214,
41
+ "grad_norm": 3.8541226387023926,
42
+ "learning_rate": 4.7614503816793896e-05,
43
+ "loss": 1.291,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.5725190839694656,
48
+ "grad_norm": 12.443678855895996,
49
+ "learning_rate": 4.713740458015267e-05,
50
+ "loss": 1.2381,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 0.6679389312977099,
55
+ "grad_norm": 4.856504917144775,
56
+ "learning_rate": 4.666030534351145e-05,
57
+ "loss": 1.1978,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.7633587786259542,
62
+ "grad_norm": 4.257474422454834,
63
+ "learning_rate": 4.618320610687023e-05,
64
+ "loss": 1.1588,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 0.8587786259541985,
69
+ "grad_norm": 4.104170322418213,
70
+ "learning_rate": 4.570610687022901e-05,
71
+ "loss": 1.1497,
72
+ "step": 900
73
+ },
74
+ {
75
+ "epoch": 0.9541984732824428,
76
+ "grad_norm": 5.3210039138793945,
77
+ "learning_rate": 4.522900763358779e-05,
78
+ "loss": 1.1079,
79
+ "step": 1000
80
+ },
81
+ {
82
+ "epoch": 1.0,
83
+ "eval_f1": 0.7669080563260455,
84
+ "eval_loss": 1.070055365562439,
85
+ "eval_runtime": 85.1145,
86
+ "eval_samples_per_second": 393.881,
87
+ "eval_steps_per_second": 6.156,
88
+ "step": 1048
89
+ },
90
+ {
91
+ "epoch": 1.049618320610687,
92
+ "grad_norm": 4.200919151306152,
93
+ "learning_rate": 4.475190839694657e-05,
94
+ "loss": 0.9816,
95
+ "step": 1100
96
+ },
97
+ {
98
+ "epoch": 1.1450381679389312,
99
+ "grad_norm": 5.287803649902344,
100
+ "learning_rate": 4.4274809160305345e-05,
101
+ "loss": 0.8863,
102
+ "step": 1200
103
+ },
104
+ {
105
+ "epoch": 1.2404580152671756,
106
+ "grad_norm": 4.714105606079102,
107
+ "learning_rate": 4.379770992366413e-05,
108
+ "loss": 0.8701,
109
+ "step": 1300
110
+ },
111
+ {
112
+ "epoch": 1.33587786259542,
113
+ "grad_norm": 4.039190292358398,
114
+ "learning_rate": 4.332061068702291e-05,
115
+ "loss": 0.8825,
116
+ "step": 1400
117
+ },
118
+ {
119
+ "epoch": 1.4312977099236641,
120
+ "grad_norm": 4.385517120361328,
121
+ "learning_rate": 4.2843511450381676e-05,
122
+ "loss": 0.8781,
123
+ "step": 1500
124
+ },
125
+ {
126
+ "epoch": 1.5267175572519083,
127
+ "grad_norm": 5.384212493896484,
128
+ "learning_rate": 4.236641221374046e-05,
129
+ "loss": 0.8599,
130
+ "step": 1600
131
+ },
132
+ {
133
+ "epoch": 1.6221374045801527,
134
+ "grad_norm": 4.302755832672119,
135
+ "learning_rate": 4.188931297709924e-05,
136
+ "loss": 0.8789,
137
+ "step": 1700
138
+ },
139
+ {
140
+ "epoch": 1.717557251908397,
141
+ "grad_norm": 3.6755897998809814,
142
+ "learning_rate": 4.1412213740458014e-05,
143
+ "loss": 0.8725,
144
+ "step": 1800
145
+ },
146
+ {
147
+ "epoch": 1.8129770992366412,
148
+ "grad_norm": 5.063183784484863,
149
+ "learning_rate": 4.0935114503816795e-05,
150
+ "loss": 0.8711,
151
+ "step": 1900
152
+ },
153
+ {
154
+ "epoch": 1.9083969465648853,
155
+ "grad_norm": 5.043242931365967,
156
+ "learning_rate": 4.0458015267175576e-05,
157
+ "loss": 0.842,
158
+ "step": 2000
159
+ },
160
+ {
161
+ "epoch": 2.0,
162
+ "eval_f1": 0.7803345268228339,
163
+ "eval_loss": 1.0013784170150757,
164
+ "eval_runtime": 86.731,
165
+ "eval_samples_per_second": 386.54,
166
+ "eval_steps_per_second": 6.042,
167
+ "step": 2096
168
+ }
169
+ ],
170
+ "logging_steps": 100,
171
+ "max_steps": 10480,
172
+ "num_input_tokens_seen": 0,
173
+ "num_train_epochs": 10,
174
+ "save_steps": 500,
175
+ "stateful_callbacks": {
176
+ "TrainerControl": {
177
+ "args": {
178
+ "should_epoch_stop": false,
179
+ "should_evaluate": false,
180
+ "should_log": false,
181
+ "should_save": true,
182
+ "should_training_stop": false
183
+ },
184
+ "attributes": {}
185
+ }
186
+ },
187
+ "total_flos": 3.529481144352768e+16,
188
+ "train_batch_size": 128,
189
+ "trial_name": null,
190
+ "trial_params": null
191
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:470cb7c6da2ad69fe3e0b767988b9961419c9788b2b86dd909adb70c6a73be1a
3
+ size 5777