avsolatorio commited on
Commit
f0397c3
·
verified ·
1 Parent(s): 9953d8d

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -8,184 +8,184 @@
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
- "0": "Global Financing Facility for Women, Children and Adolescents",
12
- "1": "Competitiveness",
13
- "2": "Sustainable Communities",
14
- "3": "Education",
15
- "4": "Natural Capital",
16
- "5": "Regional Integration",
17
- "6": "Jobs and Development",
18
- "7": "Innovation and Entrepreneurship",
19
- "8": "Inclusive Cities",
20
- "9": "Jobs & Development",
21
- "10": "Transport",
22
- "11": "Sustainable Infrastructure Finance",
23
- "12": "Pandemic Preparedness and COVID-19",
24
- "13": "Taxes and Government Revenue",
25
- "14": "Labor Markets",
26
- "15": "Measuring Poverty",
27
- "16": "Food Security Update",
28
- "17": "Disability Inclusion",
29
- "18": "Agriculture and Food",
30
- "19": "Trade Facilitation and Logistics",
31
- "20": "Social Protection",
32
- "21": "One Health",
33
- "22": "Biodiversity",
34
- "23": "Social Inclusion",
35
- "24": "Credit Infrastructure",
36
- "25": "Water Supply",
37
- "26": "Early Childhood Development",
38
- "27": "Food System Jobs",
39
- "28": "Migration",
40
- "29": "Indigenous Peoples",
41
- "30": "Universal Health Coverage",
42
- "31": "Financial Sector",
43
- "32": "Procurement for Development",
44
- "33": "Inequality and Shared Prosperity",
45
- "34": "COVID-19 Hub",
46
- "35": "Poverty",
47
- "36": "Financial Stability",
48
- "37": "Digital Development",
49
- "38": "Long-Term Finance",
50
- "39": "Gas Flaring Reduction",
51
- "40": "Mining Investment and Governance Review",
52
- "41": "Small and Medium Enterprises Finance",
53
- "42": "Infrastructure",
54
- "43": "Health",
55
- "44": "Sexual Orientation and Gender Identity",
56
- "45": "Nutrition",
57
- "46": "Financial Inclusion",
58
- "47": "Fragility, Conflict, and Violence",
59
- "48": "Debt Relief",
60
- "49": "Disaster Risk Management",
61
- "50": "Water in Agriculture",
62
- "51": "Livestock and Sustainability",
63
- "52": "Global Value Chains",
64
- "53": "Competition Policy",
65
- "54": "Pollution",
66
- "55": "Urban Development",
67
- "56": "Gender",
68
- "57": "Safety Nets and Cash Transfers",
69
- "58": "Forests and Landscapes",
70
- "59": "Water Resources Management",
71
- "60": "Extractive Industries",
72
- "61": "Social Sustainability and Inclusion",
73
- "62": "Energy",
74
- "63": "Girls' Education",
75
- "64": "Environment",
76
- "65": "Marine Plastic Pollution",
77
- "66": "Education and Technology",
78
- "67": "Financial Integrity",
79
- "68": "Oceans, Fisheries, and Coastal Economies",
80
- "69": "Sanitation",
81
- "70": "Land",
82
- "71": "Higher Education",
83
- "72": "Teachers",
84
- "73": "Investment Climate",
85
- "74": "Debt",
86
- "75": "Climate Change",
87
- "76": "Trade",
88
- "77": "Skills Development",
89
- "78": "Agribusiness and Value Chains",
90
- "79": "Climate-Smart Agriculture",
91
- "80": "Pensions",
92
- "81": "Infectious diseases and Vaccines",
93
- "82": "Payment Systems",
94
- "83": "Community-Driven Development",
95
- "84": "Water",
96
- "85": "Governance",
97
- "86": "Macroeconomics"
98
  },
99
  "initializer_range": 0.02,
100
  "intermediate_size": 3072,
101
  "label2id": {
102
- "Agribusiness and Value Chains": 78,
103
- "Agriculture and Food": 18,
104
- "Biodiversity": 22,
105
- "COVID-19 Hub": 34,
106
- "Climate Change": 75,
107
- "Climate-Smart Agriculture": 79,
108
- "Community-Driven Development": 83,
109
- "Competition Policy": 53,
110
- "Competitiveness": 1,
111
- "Credit Infrastructure": 24,
112
- "Debt": 74,
113
- "Debt Relief": 48,
114
- "Digital Development": 37,
115
- "Disability Inclusion": 17,
116
- "Disaster Risk Management": 49,
117
- "Early Childhood Development": 26,
118
- "Education": 3,
119
- "Education and Technology": 66,
120
- "Energy": 62,
121
- "Environment": 64,
122
- "Extractive Industries": 60,
123
- "Financial Inclusion": 46,
124
- "Financial Integrity": 67,
125
- "Financial Sector": 31,
126
- "Financial Stability": 36,
127
- "Food Security Update": 16,
128
- "Food System Jobs": 27,
129
- "Forests and Landscapes": 58,
130
- "Fragility, Conflict, and Violence": 47,
131
- "Gas Flaring Reduction": 39,
132
- "Gender": 56,
133
- "Girls' Education": 63,
134
- "Global Financing Facility for Women, Children and Adolescents": 0,
135
- "Global Value Chains": 52,
136
- "Governance": 85,
137
- "Health": 43,
138
- "Higher Education": 71,
139
- "Inclusive Cities": 8,
140
- "Indigenous Peoples": 29,
141
- "Inequality and Shared Prosperity": 33,
142
- "Infectious diseases and Vaccines": 81,
143
- "Infrastructure": 42,
144
- "Innovation and Entrepreneurship": 7,
145
- "Investment Climate": 73,
146
- "Jobs & Development": 9,
147
- "Jobs and Development": 6,
148
- "Labor Markets": 14,
149
- "Land": 70,
150
- "Livestock and Sustainability": 51,
151
- "Long-Term Finance": 38,
152
- "Macroeconomics": 86,
153
- "Marine Plastic Pollution": 65,
154
- "Measuring Poverty": 15,
155
- "Migration": 28,
156
- "Mining Investment and Governance Review": 40,
157
- "Natural Capital": 4,
158
- "Nutrition": 45,
159
- "Oceans, Fisheries, and Coastal Economies": 68,
160
- "One Health": 21,
161
- "Pandemic Preparedness and COVID-19": 12,
162
- "Payment Systems": 82,
163
- "Pensions": 80,
164
- "Pollution": 54,
165
- "Poverty": 35,
166
- "Procurement for Development": 32,
167
- "Regional Integration": 5,
168
- "Safety Nets and Cash Transfers": 57,
169
- "Sanitation": 69,
170
- "Sexual Orientation and Gender Identity": 44,
171
- "Skills Development": 77,
172
- "Small and Medium Enterprises Finance": 41,
173
- "Social Inclusion": 23,
174
- "Social Protection": 20,
175
- "Social Sustainability and Inclusion": 61,
176
- "Sustainable Communities": 2,
177
- "Sustainable Infrastructure Finance": 11,
178
- "Taxes and Government Revenue": 13,
179
- "Teachers": 72,
180
- "Trade": 76,
181
- "Trade Facilitation and Logistics": 19,
182
- "Transport": 10,
183
- "Universal Health Coverage": 30,
184
- "Urban Development": 55,
185
- "Water": 84,
186
- "Water Resources Management": 59,
187
- "Water Supply": 25,
188
- "Water in Agriculture": 50
189
  },
190
  "layer_norm_eps": 1e-07,
191
  "max_position_embeddings": 512,
 
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
+ "0": "Indigenous Peoples",
12
+ "1": "Investment Climate",
13
+ "2": "Marine Plastic Pollution",
14
+ "3": "Poverty",
15
+ "4": "Water",
16
+ "5": "Measuring Poverty",
17
+ "6": "Energy",
18
+ "7": "Sustainable Communities",
19
+ "8": "Disability Inclusion",
20
+ "9": "Macroeconomics",
21
+ "10": "Early Childhood Development",
22
+ "11": "Mining Investment and Governance Review",
23
+ "12": "Infrastructure",
24
+ "13": "Community-Driven Development",
25
+ "14": "Land",
26
+ "15": "Innovation and Entrepreneurship",
27
+ "16": "Competitiveness",
28
+ "17": "Biodiversity",
29
+ "18": "Debt",
30
+ "19": "Higher Education",
31
+ "20": "Livestock and Sustainability",
32
+ "21": "Universal Health Coverage",
33
+ "22": "Nutrition",
34
+ "23": "Education and Technology",
35
+ "24": "Small and Medium Enterprises Finance",
36
+ "25": "Climate-Smart Agriculture",
37
+ "26": "Food Security Update",
38
+ "27": "Inclusive Cities",
39
+ "28": "Jobs & Development",
40
+ "29": "Migration",
41
+ "30": "Infectious diseases and Vaccines",
42
+ "31": "Social Protection",
43
+ "32": "Skills Development",
44
+ "33": "Governance",
45
+ "34": "Pollution",
46
+ "35": "Inequality and Shared Prosperity",
47
+ "36": "Oceans, Fisheries, and Coastal Economies",
48
+ "37": "Global Value Chains",
49
+ "38": "Pensions",
50
+ "39": "Food System Jobs",
51
+ "40": "Debt Relief",
52
+ "41": "Sexual Orientation and Gender Identity",
53
+ "42": "Education",
54
+ "43": "Regional Integration",
55
+ "44": "Jobs and Development",
56
+ "45": "Competition Policy",
57
+ "46": "Fragility, Conflict, and Violence",
58
+ "47": "Urban Development",
59
+ "48": "Pandemic Preparedness and COVID-19",
60
+ "49": "Safety Nets and Cash Transfers",
61
+ "50": "Financial Sector",
62
+ "51": "Climate Change",
63
+ "52": "Transport",
64
+ "53": "COVID-19 Hub",
65
+ "54": "Financial Stability",
66
+ "55": "Gender",
67
+ "56": "Natural Capital",
68
+ "57": "Water in Agriculture",
69
+ "58": "Taxes and Government Revenue",
70
+ "59": "Labor Markets",
71
+ "60": "Trade Facilitation and Logistics",
72
+ "61": "Long-Term Finance",
73
+ "62": "Teachers",
74
+ "63": "Sanitation",
75
+ "64": "Forests and Landscapes",
76
+ "65": "Financial Integrity",
77
+ "66": "Payment Systems",
78
+ "67": "Social Inclusion",
79
+ "68": "Health",
80
+ "69": "Girls' Education",
81
+ "70": "Credit Infrastructure",
82
+ "71": "Social Sustainability and Inclusion",
83
+ "72": "Trade",
84
+ "73": "Financial Inclusion",
85
+ "74": "Global Financing Facility for Women, Children and Adolescents",
86
+ "75": "Sustainable Infrastructure Finance",
87
+ "76": "Extractive Industries",
88
+ "77": "Procurement for Development",
89
+ "78": "Agriculture and Food",
90
+ "79": "Water Supply",
91
+ "80": "Disaster Risk Management",
92
+ "81": "One Health",
93
+ "82": "Environment",
94
+ "83": "Digital Development",
95
+ "84": "Agribusiness and Value Chains",
96
+ "85": "Gas Flaring Reduction",
97
+ "86": "Water Resources Management"
98
  },
99
  "initializer_range": 0.02,
100
  "intermediate_size": 3072,
101
  "label2id": {
102
+ "Agribusiness and Value Chains": 84,
103
+ "Agriculture and Food": 78,
104
+ "Biodiversity": 17,
105
+ "COVID-19 Hub": 53,
106
+ "Climate Change": 51,
107
+ "Climate-Smart Agriculture": 25,
108
+ "Community-Driven Development": 13,
109
+ "Competition Policy": 45,
110
+ "Competitiveness": 16,
111
+ "Credit Infrastructure": 70,
112
+ "Debt": 18,
113
+ "Debt Relief": 40,
114
+ "Digital Development": 83,
115
+ "Disability Inclusion": 8,
116
+ "Disaster Risk Management": 80,
117
+ "Early Childhood Development": 10,
118
+ "Education": 42,
119
+ "Education and Technology": 23,
120
+ "Energy": 6,
121
+ "Environment": 82,
122
+ "Extractive Industries": 76,
123
+ "Financial Inclusion": 73,
124
+ "Financial Integrity": 65,
125
+ "Financial Sector": 50,
126
+ "Financial Stability": 54,
127
+ "Food Security Update": 26,
128
+ "Food System Jobs": 39,
129
+ "Forests and Landscapes": 64,
130
+ "Fragility, Conflict, and Violence": 46,
131
+ "Gas Flaring Reduction": 85,
132
+ "Gender": 55,
133
+ "Girls' Education": 69,
134
+ "Global Financing Facility for Women, Children and Adolescents": 74,
135
+ "Global Value Chains": 37,
136
+ "Governance": 33,
137
+ "Health": 68,
138
+ "Higher Education": 19,
139
+ "Inclusive Cities": 27,
140
+ "Indigenous Peoples": 0,
141
+ "Inequality and Shared Prosperity": 35,
142
+ "Infectious diseases and Vaccines": 30,
143
+ "Infrastructure": 12,
144
+ "Innovation and Entrepreneurship": 15,
145
+ "Investment Climate": 1,
146
+ "Jobs & Development": 28,
147
+ "Jobs and Development": 44,
148
+ "Labor Markets": 59,
149
+ "Land": 14,
150
+ "Livestock and Sustainability": 20,
151
+ "Long-Term Finance": 61,
152
+ "Macroeconomics": 9,
153
+ "Marine Plastic Pollution": 2,
154
+ "Measuring Poverty": 5,
155
+ "Migration": 29,
156
+ "Mining Investment and Governance Review": 11,
157
+ "Natural Capital": 56,
158
+ "Nutrition": 22,
159
+ "Oceans, Fisheries, and Coastal Economies": 36,
160
+ "One Health": 81,
161
+ "Pandemic Preparedness and COVID-19": 48,
162
+ "Payment Systems": 66,
163
+ "Pensions": 38,
164
+ "Pollution": 34,
165
+ "Poverty": 3,
166
+ "Procurement for Development": 77,
167
+ "Regional Integration": 43,
168
+ "Safety Nets and Cash Transfers": 49,
169
+ "Sanitation": 63,
170
+ "Sexual Orientation and Gender Identity": 41,
171
+ "Skills Development": 32,
172
+ "Small and Medium Enterprises Finance": 24,
173
+ "Social Inclusion": 67,
174
+ "Social Protection": 31,
175
+ "Social Sustainability and Inclusion": 71,
176
+ "Sustainable Communities": 7,
177
+ "Sustainable Infrastructure Finance": 75,
178
+ "Taxes and Government Revenue": 58,
179
+ "Teachers": 62,
180
+ "Trade": 72,
181
+ "Trade Facilitation and Logistics": 60,
182
+ "Transport": 52,
183
+ "Universal Health Coverage": 21,
184
+ "Urban Development": 47,
185
+ "Water": 4,
186
+ "Water Resources Management": 86,
187
+ "Water Supply": 79,
188
+ "Water in Agriculture": 57
189
  },
190
  "layer_norm_eps": 1e-07,
191
  "max_position_embeddings": 512,
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a339bd470013c8491ebf0d72fc3ee02df1ef2e6cd7b107d451f5ab1060a88f2e
3
  size 567860028
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ce19c672be0d8e93a7fa34f22da21019af73a72b352a65bdfa8a3d06b1c6a9e
3
  size 567860028
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35a3999d734c354e3c99f99c8060ac2985872124b38cf5e0c86f51c610e3425e
3
  size 1135783354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3f1fa73ea7a5820ec872caa2a1dcbe4434a262fc31339d2a61a280dcaae0398
3
  size 1135783354
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:886034beaadedc8c14b848859c00bafbf6ac1fe7936754759a2ece8f7c66229f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34649568145f465443beb487f2295ab3dad9e5f49f758646dd823029413e18fe
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fda17ccbaef24102552407afe26348887346d6be2aa1770720ba332a543beee
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e136870ce715682b288c29fb4d05a0aec61f1a74b2cc393d7fb9e66e4965261
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,358 +1,55 @@
1
  {
2
- "best_metric": 0.03880663216114044,
3
- "best_model_checkpoint": "doc-topic-model_eval-01_train-00/checkpoint-10000",
4
- "epoch": 102.36220472440945,
5
  "eval_steps": 1000,
6
- "global_step": 13000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 3.937007874015748,
13
- "grad_norm": 0.24089162051677704,
14
- "learning_rate": 4.876968503937009e-06,
15
- "loss": 0.3064,
16
  "step": 500
17
  },
18
  {
19
- "epoch": 7.874015748031496,
20
- "grad_norm": 0.10720663517713547,
21
- "learning_rate": 4.753937007874016e-06,
22
- "loss": 0.099,
23
  "step": 1000
24
  },
25
  {
26
- "epoch": 7.874015748031496,
27
  "eval_accuracy": 0.981383845685049,
28
  "eval_f1": 0.0,
29
- "eval_loss": 0.09238693863153458,
30
  "eval_precision": 0.0,
31
  "eval_recall": 0.0,
32
- "eval_runtime": 11.7879,
33
- "eval_samples_per_second": 687.993,
34
- "eval_steps_per_second": 2.715,
35
  "step": 1000
36
- },
37
- {
38
- "epoch": 11.811023622047244,
39
- "grad_norm": 0.10422395914793015,
40
- "learning_rate": 4.630905511811024e-06,
41
- "loss": 0.0907,
42
- "step": 1500
43
- },
44
- {
45
- "epoch": 15.748031496062993,
46
- "grad_norm": 0.09992287307977676,
47
- "learning_rate": 4.507874015748032e-06,
48
- "loss": 0.0826,
49
- "step": 2000
50
- },
51
- {
52
- "epoch": 15.748031496062993,
53
- "eval_accuracy": 0.981383845685049,
54
- "eval_f1": 0.0,
55
- "eval_loss": 0.0752759575843811,
56
- "eval_precision": 0.0,
57
- "eval_recall": 0.0,
58
- "eval_runtime": 11.8287,
59
- "eval_samples_per_second": 685.62,
60
- "eval_steps_per_second": 2.705,
61
- "step": 2000
62
- },
63
- {
64
- "epoch": 19.68503937007874,
65
- "grad_norm": 0.10254054516553879,
66
- "learning_rate": 4.38484251968504e-06,
67
- "loss": 0.0738,
68
- "step": 2500
69
- },
70
- {
71
- "epoch": 23.62204724409449,
72
- "grad_norm": 0.10703600943088531,
73
- "learning_rate": 4.261811023622048e-06,
74
- "loss": 0.0656,
75
- "step": 3000
76
- },
77
- {
78
- "epoch": 23.62204724409449,
79
- "eval_accuracy": 0.9818331278257295,
80
- "eval_f1": 0.0531836312601566,
81
- "eval_loss": 0.05948049575090408,
82
- "eval_precision": 0.8933002481389578,
83
- "eval_recall": 0.027407689379520365,
84
- "eval_runtime": 14.2215,
85
- "eval_samples_per_second": 570.263,
86
- "eval_steps_per_second": 2.25,
87
- "step": 3000
88
- },
89
- {
90
- "epoch": 27.559055118110237,
91
- "grad_norm": 0.10907676815986633,
92
- "learning_rate": 4.138779527559055e-06,
93
- "loss": 0.0585,
94
- "step": 3500
95
- },
96
- {
97
- "epoch": 31.496062992125985,
98
- "grad_norm": 0.10444527864456177,
99
- "learning_rate": 4.015748031496064e-06,
100
- "loss": 0.0529,
101
- "step": 4000
102
- },
103
- {
104
- "epoch": 31.496062992125985,
105
- "eval_accuracy": 0.9846790538146463,
106
- "eval_f1": 0.3410143867349427,
107
- "eval_loss": 0.049754805862903595,
108
- "eval_precision": 0.8556133374120526,
109
- "eval_recall": 0.2129425199847735,
110
- "eval_runtime": 14.1634,
111
- "eval_samples_per_second": 572.604,
112
- "eval_steps_per_second": 2.259,
113
- "step": 4000
114
- },
115
- {
116
- "epoch": 35.43307086614173,
117
- "grad_norm": 0.11932671815156937,
118
- "learning_rate": 3.8927165354330705e-06,
119
- "loss": 0.0482,
120
- "step": 4500
121
- },
122
- {
123
- "epoch": 39.37007874015748,
124
- "grad_norm": 0.11693865060806274,
125
- "learning_rate": 3.769685039370079e-06,
126
- "loss": 0.0446,
127
- "step": 5000
128
- },
129
- {
130
- "epoch": 39.37007874015748,
131
- "eval_accuracy": 0.9862933514746942,
132
- "eval_f1": 0.47693222997457946,
133
- "eval_loss": 0.04449684917926788,
134
- "eval_precision": 0.8234964512514008,
135
- "eval_recall": 0.3356680624286258,
136
- "eval_runtime": 14.0359,
137
- "eval_samples_per_second": 577.806,
138
- "eval_steps_per_second": 2.28,
139
- "step": 5000
140
- },
141
- {
142
- "epoch": 43.30708661417323,
143
- "grad_norm": 0.12081195414066315,
144
- "learning_rate": 3.646653543307087e-06,
145
- "loss": 0.0416,
146
- "step": 5500
147
- },
148
- {
149
- "epoch": 47.24409448818898,
150
- "grad_norm": 0.12802477180957794,
151
- "learning_rate": 3.5236220472440946e-06,
152
- "loss": 0.039,
153
- "step": 6000
154
- },
155
- {
156
- "epoch": 47.24409448818898,
157
- "eval_accuracy": 0.9868857802911122,
158
- "eval_f1": 0.5325587269512503,
159
- "eval_loss": 0.041690852493047714,
160
- "eval_precision": 0.7914414414414415,
161
- "eval_recall": 0.40129425199847735,
162
- "eval_runtime": 14.1088,
163
- "eval_samples_per_second": 574.819,
164
- "eval_steps_per_second": 2.268,
165
- "step": 6000
166
- },
167
- {
168
- "epoch": 51.181102362204726,
169
- "grad_norm": 0.12447524815797806,
170
- "learning_rate": 3.4005905511811027e-06,
171
- "loss": 0.0368,
172
- "step": 6500
173
- },
174
- {
175
- "epoch": 55.118110236220474,
176
- "grad_norm": 0.14242896437644958,
177
- "learning_rate": 3.2775590551181103e-06,
178
- "loss": 0.0348,
179
- "step": 7000
180
- },
181
- {
182
- "epoch": 55.118110236220474,
183
- "eval_accuracy": 0.987220261632439,
184
- "eval_f1": 0.5665112254218547,
185
- "eval_loss": 0.04018256813287735,
186
- "eval_precision": 0.7685885729193843,
187
- "eval_recall": 0.44857251617814997,
188
- "eval_runtime": 14.0857,
189
- "eval_samples_per_second": 575.762,
190
- "eval_steps_per_second": 2.272,
191
- "step": 7000
192
- },
193
- {
194
- "epoch": 59.05511811023622,
195
- "grad_norm": 0.12024829536676407,
196
- "learning_rate": 3.1545275590551183e-06,
197
- "loss": 0.0332,
198
- "step": 7500
199
- },
200
- {
201
- "epoch": 62.99212598425197,
202
- "grad_norm": 0.12477916479110718,
203
- "learning_rate": 3.0314960629921263e-06,
204
- "loss": 0.0316,
205
- "step": 8000
206
- },
207
- {
208
- "epoch": 62.99212598425197,
209
- "eval_accuracy": 0.9873237240812393,
210
- "eval_f1": 0.5808023997000374,
211
- "eval_loss": 0.03934308886528015,
212
- "eval_precision": 0.7555176198024631,
213
- "eval_recall": 0.47171678720974497,
214
- "eval_runtime": 14.1551,
215
- "eval_samples_per_second": 572.936,
216
- "eval_steps_per_second": 2.261,
217
- "step": 8000
218
- },
219
- {
220
- "epoch": 66.92913385826772,
221
- "grad_norm": 0.13788625597953796,
222
- "learning_rate": 2.908464566929134e-06,
223
- "loss": 0.0302,
224
- "step": 8500
225
- },
226
- {
227
- "epoch": 70.86614173228347,
228
- "grad_norm": 0.12086493521928787,
229
- "learning_rate": 2.785433070866142e-06,
230
- "loss": 0.029,
231
- "step": 9000
232
- },
233
- {
234
- "epoch": 70.86614173228347,
235
- "eval_accuracy": 0.9874427767620505,
236
- "eval_f1": 0.5916297935103245,
237
- "eval_loss": 0.038920916616916656,
238
- "eval_precision": 0.7496787758439435,
239
- "eval_recall": 0.4886181956604492,
240
- "eval_runtime": 14.1532,
241
- "eval_samples_per_second": 573.016,
242
- "eval_steps_per_second": 2.261,
243
- "step": 9000
244
- },
245
- {
246
- "epoch": 74.80314960629921,
247
- "grad_norm": 0.1340780407190323,
248
- "learning_rate": 2.66240157480315e-06,
249
- "loss": 0.0279,
250
- "step": 9500
251
- },
252
- {
253
- "epoch": 78.74015748031496,
254
- "grad_norm": 0.15667644143104553,
255
- "learning_rate": 2.5393700787401576e-06,
256
- "loss": 0.0269,
257
- "step": 10000
258
- },
259
- {
260
- "epoch": 78.74015748031496,
261
- "eval_accuracy": 0.9875037203962753,
262
- "eval_f1": 0.598039662639617,
263
- "eval_loss": 0.03880663216114044,
264
- "eval_precision": 0.7453409090909091,
265
- "eval_recall": 0.49935287400076134,
266
- "eval_runtime": 14.1798,
267
- "eval_samples_per_second": 571.941,
268
- "eval_steps_per_second": 2.257,
269
- "step": 10000
270
- },
271
- {
272
- "epoch": 82.67716535433071,
273
- "grad_norm": 0.1377759575843811,
274
- "learning_rate": 2.4163385826771656e-06,
275
- "loss": 0.0258,
276
- "step": 10500
277
- },
278
- {
279
- "epoch": 86.61417322834646,
280
- "grad_norm": 0.1264483779668808,
281
- "learning_rate": 2.2933070866141736e-06,
282
- "loss": 0.0249,
283
- "step": 11000
284
- },
285
- {
286
- "epoch": 86.61417322834646,
287
- "eval_accuracy": 0.9874484459373273,
288
- "eval_f1": 0.6034745231485628,
289
- "eval_loss": 0.0388496108353138,
290
- "eval_precision": 0.7325796282204587,
291
- "eval_recall": 0.5130567186905215,
292
- "eval_runtime": 14.1595,
293
- "eval_samples_per_second": 572.761,
294
- "eval_steps_per_second": 2.26,
295
- "step": 11000
296
- },
297
- {
298
- "epoch": 90.55118110236221,
299
- "grad_norm": 0.1424516886472702,
300
- "learning_rate": 2.1702755905511812e-06,
301
- "loss": 0.0241,
302
- "step": 11500
303
- },
304
- {
305
- "epoch": 94.48818897637796,
306
- "grad_norm": 0.16246256232261658,
307
- "learning_rate": 2.0472440944881893e-06,
308
- "loss": 0.0234,
309
- "step": 12000
310
- },
311
- {
312
- "epoch": 94.48818897637796,
313
- "eval_accuracy": 0.9874186827671244,
314
- "eval_f1": 0.6064985150050978,
315
- "eval_loss": 0.038960497826337814,
316
- "eval_precision": 0.7259125636672326,
317
- "eval_recall": 0.5208222306813856,
318
- "eval_runtime": 14.1182,
319
- "eval_samples_per_second": 574.434,
320
- "eval_steps_per_second": 2.267,
321
- "step": 12000
322
- },
323
- {
324
- "epoch": 98.4251968503937,
325
- "grad_norm": 0.15036757290363312,
326
- "learning_rate": 1.924212598425197e-06,
327
- "loss": 0.0227,
328
- "step": 12500
329
- },
330
- {
331
- "epoch": 102.36220472440945,
332
- "grad_norm": 0.14854416251182556,
333
- "learning_rate": 1.801181102362205e-06,
334
- "loss": 0.022,
335
- "step": 13000
336
- },
337
- {
338
- "epoch": 102.36220472440945,
339
- "eval_accuracy": 0.987407344416571,
340
- "eval_f1": 0.6050584522380762,
341
- "eval_loss": 0.03925173729658127,
342
- "eval_precision": 0.7269814142277291,
343
- "eval_recall": 0.5181575942139323,
344
- "eval_runtime": 14.1509,
345
- "eval_samples_per_second": 573.11,
346
- "eval_steps_per_second": 2.261,
347
- "step": 13000
348
  }
349
  ],
350
  "logging_steps": 500,
351
- "max_steps": 20320,
352
  "num_input_tokens_seen": 0,
353
- "num_train_epochs": 160,
354
  "save_steps": 1000,
355
  "stateful_callbacks": {
 
 
 
 
 
 
 
 
 
356
  "TrainerControl": {
357
  "args": {
358
  "should_epoch_stop": false,
@@ -364,8 +61,8 @@
364
  "attributes": {}
365
  }
366
  },
367
- "total_flos": 5841706246352316.0,
368
- "train_batch_size": 64,
369
  "trial_name": null,
370
  "trial_params": null
371
  }
 
1
  {
2
+ "best_metric": 0.0910971537232399,
3
+ "best_model_checkpoint": "doc-topic-model_eval-01_train-00/checkpoint-1000",
4
+ "epoch": 0.4930966469428008,
5
  "eval_steps": 1000,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.2465483234714004,
13
+ "grad_norm": 0.32839319109916687,
14
+ "learning_rate": 1.9950690335305722e-05,
15
+ "loss": 0.1668,
16
  "step": 500
17
  },
18
  {
19
+ "epoch": 0.4930966469428008,
20
+ "grad_norm": 0.4088590443134308,
21
+ "learning_rate": 1.9901380670611442e-05,
22
+ "loss": 0.0929,
23
  "step": 1000
24
  },
25
  {
26
+ "epoch": 0.4930966469428008,
27
  "eval_accuracy": 0.981383845685049,
28
  "eval_f1": 0.0,
29
+ "eval_loss": 0.0910971537232399,
30
  "eval_precision": 0.0,
31
  "eval_recall": 0.0,
32
+ "eval_runtime": 12.1172,
33
+ "eval_samples_per_second": 669.294,
34
+ "eval_steps_per_second": 2.641,
35
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
37
  ],
38
  "logging_steps": 500,
39
+ "max_steps": 202800,
40
  "num_input_tokens_seen": 0,
41
+ "num_train_epochs": 100,
42
  "save_steps": 1000,
43
  "stateful_callbacks": {
44
+ "EarlyStoppingCallback": {
45
+ "args": {
46
+ "early_stopping_patience": 5,
47
+ "early_stopping_threshold": 0.0
48
+ },
49
+ "attributes": {
50
+ "early_stopping_patience_counter": 0
51
+ }
52
+ },
53
  "TrainerControl": {
54
  "args": {
55
  "should_epoch_stop": false,
 
61
  "attributes": {}
62
  }
63
  },
64
+ "total_flos": 22568907250224.0,
65
+ "train_batch_size": 4,
66
  "trial_name": null,
67
  "trial_params": null
68
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f40a68245ac62de16e2d352af4044f4b0e3ae9e443007177476e04d8ebd03827
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:793ce0ea9b102975f2c13ebb162cd3b8fa0b532090e49ab65ff0a4403515a81b
3
  size 5240