Bukareszt commited on
Commit
9edf4c1
·
verified ·
1 Parent(s): d62a59f

Initial push

Browse files
README.md CHANGED
@@ -21,11 +21,11 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [PKOBP/polish-roberta-8k](https://huggingface.co/PKOBP/polish-roberta-8k) on the None dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 1.1007
25
- - Accuracy: 0.7838
26
- - Precision: 0.7630
27
- - Recall: 0.7838
28
- - F1: 0.7601
29
 
30
  ## Model description
31
 
@@ -60,14 +60,14 @@ The following hyperparameters were used during training:
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
63
- | No log | 1.0 | 40 | 2.7021 | 0.3856 | 0.2986 | 0.3856 | 0.2870 |
64
- | 3.0953 | 2.0 | 80 | 1.9267 | 0.5958 | 0.5010 | 0.5958 | 0.5128 |
65
- | 2.1153 | 3.0 | 120 | 1.5299 | 0.6978 | 0.6806 | 0.6978 | 0.6399 |
66
- | 1.5767 | 4.0 | 160 | 1.3317 | 0.7376 | 0.7340 | 0.7376 | 0.7022 |
67
- | 1.2985 | 5.0 | 200 | 1.2154 | 0.7674 | 0.7460 | 0.7674 | 0.7407 |
68
- | 1.2985 | 6.0 | 240 | 1.1614 | 0.7749 | 0.7545 | 0.7749 | 0.7515 |
69
- | 1.1262 | 7.0 | 280 | 1.1227 | 0.7799 | 0.7575 | 0.7799 | 0.7605 |
70
- | 1.0373 | 8.0 | 320 | 1.1079 | 0.7786 | 0.7540 | 0.7786 | 0.7588 |
71
 
72
 
73
  ### Framework versions
 
21
 
22
  This model is a fine-tuned version of [PKOBP/polish-roberta-8k](https://huggingface.co/PKOBP/polish-roberta-8k) on the None dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 1.5204
25
+ - Accuracy: 0.6913
26
+ - Precision: 0.6262
27
+ - Recall: 0.6913
28
+ - F1: 0.6466
29
 
30
  ## Model description
31
 
 
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
63
+ | No log | 1.0 | 40 | 3.0781 | 0.3321 | 0.2327 | 0.3321 | 0.2216 |
64
+ | 3.4361 | 2.0 | 80 | 2.3150 | 0.5547 | 0.4310 | 0.5547 | 0.4554 |
65
+ | 2.5248 | 3.0 | 120 | 1.8797 | 0.6505 | 0.5657 | 0.6505 | 0.5747 |
66
+ | 1.9758 | 4.0 | 160 | 1.6472 | 0.6978 | 0.6032 | 0.6978 | 0.6343 |
67
+ | 1.6695 | 5.0 | 200 | 1.5167 | 0.7090 | 0.6439 | 0.7090 | 0.6525 |
68
+ | 1.6695 | 6.0 | 240 | 1.4413 | 0.7214 | 0.6519 | 0.7214 | 0.6721 |
69
+ | 1.49 | 7.0 | 280 | 1.4023 | 0.7276 | 0.6570 | 0.7276 | 0.6800 |
70
+ | 1.3922 | 8.0 | 320 | 1.3896 | 0.7264 | 0.6564 | 0.7264 | 0.6793 |
71
 
72
 
73
  ### Framework versions
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 8.0,
3
- "eval_accuracy": 0.7838150289017342,
4
- "eval_f1": 0.760134834840537,
5
- "eval_loss": 1.1006990671157837,
6
- "eval_precision": 0.7630015333272686,
7
- "eval_recall": 0.7838150289017342,
8
- "eval_runtime": 3.2334,
9
- "eval_samples_per_second": 267.523,
10
- "eval_steps_per_second": 5.876,
11
- "total_flos": 2.6699656498043904e+16,
12
- "train_loss": 1.6651537001132966,
13
- "train_runtime": 443.5681,
14
- "train_samples_per_second": 69.257,
15
- "train_steps_per_second": 0.721
16
  }
 
1
  {
2
  "epoch": 8.0,
3
+ "eval_accuracy": 0.6913294797687861,
4
+ "eval_f1": 0.6466102267210698,
5
+ "eval_loss": 1.5204323530197144,
6
+ "eval_precision": 0.6262094124143767,
7
+ "eval_recall": 0.6913294797687861,
8
+ "eval_runtime": 3.1459,
9
+ "eval_samples_per_second": 274.962,
10
+ "eval_steps_per_second": 6.04,
11
+ "total_flos": 2.653030012194216e+16,
12
+ "train_loss": 2.0375428795814514,
13
+ "train_runtime": 441.0529,
14
+ "train_samples_per_second": 69.652,
15
+ "train_steps_per_second": 0.726
16
  }
classification_report.txt CHANGED
@@ -154,3 +154,57 @@ WOD_OGR_PRZY 0.9500 0.9048 0.9268 21
154
  accuracy 0.7838 865
155
  macro avg 0.7710 0.7350 0.7324 865
156
  weighted avg 0.7630 0.7838 0.7601 865
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  accuracy 0.7838 865
155
  macro avg 0.7710 0.7350 0.7324 865
156
  weighted avg 0.7630 0.7838 0.7601 865
157
+
158
+ ================================================================================
159
+
160
+ DETAILED CLASSIFICATION REPORT (Top-1)
161
+ ================================================================================
162
+ precision recall f1-score support
163
+
164
+ BINFO 0.9333 0.8235 0.8750 17
165
+ DANE_ARCH 0.8333 0.3846 0.5263 13
166
+ DIERZ_ST_HYD 0.8378 0.9118 0.8732 34
167
+ INFO_DW 0.9000 0.7826 0.8372 23
168
+ INSP_TV 1.0000 1.0000 1.0000 13
169
+ INTERW_AW_K 0.6447 0.8909 0.7481 55
170
+ INTERW_AW_W 0.6389 0.8070 0.7132 57
171
+ INTERW_ODTW 0.5854 0.8889 0.7059 27
172
+ INTERW_ZAP 0.7692 0.6667 0.7143 15
173
+ NEGOC_DESZCZ 0.8611 0.9394 0.8986 33
174
+ ODWOD_KS 1.0000 0.8333 0.9091 6
175
+ OP_PRZY_WK 0.0000 0.0000 0.0000 16
176
+ OP_SIEC_WK 0.6944 0.5814 0.6329 43
177
+ OP_UM 0.8462 1.0000 0.9167 22
178
+ POZYTYW 0.0000 0.0000 0.0000 17
179
+ POZ_SPR_WIND 0.8235 0.9180 0.8682 61
180
+ PRZE_ODB_PK 0.4286 0.9231 0.5854 26
181
+ PRZE_ODB_PW 0.0000 0.0000 0.0000 12
182
+ PRZE_ODB_PWK 0.0000 0.0000 0.0000 13
183
+ PYT_INFR 0.0000 0.0000 0.0000 13
184
+ PYT_INW 0.5000 0.1667 0.2500 6
185
+ PYT_UMOWA 0.6667 0.5000 0.5714 12
186
+ REKLAMACJA_3 0.7333 1.0000 0.8462 11
187
+ REKLAMACJA_4 0.6000 0.9512 0.7358 41
188
+ REKLAMACJA_6 0.7143 0.4167 0.5263 12
189
+ REKLAMACJA_7 1.0000 0.6667 0.8000 6
190
+ REKLAMACJA_8 0.0000 0.0000 0.0000 8
191
+ REKLAMAC_11 0.0000 0.0000 0.0000 11
192
+ ROW_EKSP 0.7500 0.6923 0.7200 13
193
+ SK_I 0.5135 0.6552 0.5758 29
194
+ UDOST_WN 0.8182 0.7500 0.7826 12
195
+ UM_PARTYCY 0.8077 1.0000 0.8936 21
196
+ UZ_SIEC_WK 1.0000 0.3077 0.4706 13
197
+ WAR_WK 0.4286 0.3000 0.3529 10
198
+ WAR_WKKD 0.4667 0.5833 0.5185 12
199
+ WOD_OGR_PRZY 0.9048 0.9048 0.9048 21
200
+ WPIN_SIEC 0.8462 1.0000 0.9167 11
201
+ ZASW_KONC 0.9091 0.8333 0.8696 12
202
+ ZG_ODCZ 0.8889 0.9091 0.8989 44
203
+ ZM_DANYC_ODB 0.5625 0.7500 0.6429 12
204
+ ZM_DAN_ODB 0.0000 0.0000 0.0000 6
205
+ ZM_ZARZADCY 0.9091 0.9091 0.9091 11
206
+ ZW_NADP 0.9286 0.8667 0.8966 15
207
+
208
+ accuracy 0.7202 865
209
+ macro avg 0.6220 0.6166 0.6020 865
210
+ weighted avg 0.6635 0.7202 0.6765 865
config.json CHANGED
@@ -25,7 +25,7 @@
25
  "1": "DANE_ARCH",
26
  "2": "DIERZ_ST_HYD",
27
  "3": "INFO_DW",
28
- "4": "INSP",
29
  "5": "INTERW_AW_K",
30
  "6": "INTERW_AW_W",
31
  "7": "INTERW_ODTW",
@@ -37,22 +37,33 @@
37
  "13": "OP_UM",
38
  "14": "POZYTYW",
39
  "15": "POZ_SPR_WIND",
40
- "16": "PRZE",
41
- "17": "PYT",
42
- "18": "REKLAMACJA",
43
- "19": "ROW_EKSP",
44
- "20": "SK",
45
- "21": "UDOST_WN",
46
- "22": "UM_PARTYCY",
47
- "23": "UZ_SIEC_WK",
48
- "24": "WAR_WK",
49
- "25": "WAR_WKKD",
50
- "26": "WOD_OGR_PRZY",
51
- "27": "WPIN_SIEC",
52
- "28": "ZASW_KONC",
53
- "29": "ZG_ODCZ",
54
- "30": "ZM",
55
- "31": "ZW_NADP"
 
 
 
 
 
 
 
 
 
 
 
56
  },
57
  "initializer_range": 0.02,
58
  "intermediate_size": 4096,
@@ -61,7 +72,7 @@
61
  "DANE_ARCH": 1,
62
  "DIERZ_ST_HYD": 2,
63
  "INFO_DW": 3,
64
- "INSP": 4,
65
  "INTERW_AW_K": 5,
66
  "INTERW_AW_W": 6,
67
  "INTERW_ODTW": 7,
@@ -73,22 +84,33 @@
73
  "OP_UM": 13,
74
  "POZYTYW": 14,
75
  "POZ_SPR_WIND": 15,
76
- "PRZE": 16,
77
- "PYT": 17,
78
- "REKLAMACJA": 18,
79
- "ROW_EKSP": 19,
80
- "SK": 20,
81
- "UDOST_WN": 21,
82
- "UM_PARTYCY": 22,
83
- "UZ_SIEC_WK": 23,
84
- "WAR_WK": 24,
85
- "WAR_WKKD": 25,
86
- "WOD_OGR_PRZY": 26,
87
- "WPIN_SIEC": 27,
88
- "ZASW_KONC": 28,
89
- "ZG_ODCZ": 29,
90
- "ZM": 30,
91
- "ZW_NADP": 31
 
 
 
 
 
 
 
 
 
 
 
92
  },
93
  "layer_norm_eps": 1e-05,
94
  "max_position_embeddings": 8194,
 
25
  "1": "DANE_ARCH",
26
  "2": "DIERZ_ST_HYD",
27
  "3": "INFO_DW",
28
+ "4": "INSP_TV",
29
  "5": "INTERW_AW_K",
30
  "6": "INTERW_AW_W",
31
  "7": "INTERW_ODTW",
 
37
  "13": "OP_UM",
38
  "14": "POZYTYW",
39
  "15": "POZ_SPR_WIND",
40
+ "16": "PRZE_ODB_PK",
41
+ "17": "PRZE_ODB_PW",
42
+ "18": "PRZE_ODB_PWK",
43
+ "19": "PYT_INFR",
44
+ "20": "PYT_INW",
45
+ "21": "PYT_UMOWA",
46
+ "22": "REKLAMACJA_3",
47
+ "23": "REKLAMACJA_4",
48
+ "24": "REKLAMACJA_6",
49
+ "25": "REKLAMACJA_7",
50
+ "26": "REKLAMACJA_8",
51
+ "27": "REKLAMAC_11",
52
+ "28": "ROW_EKSP",
53
+ "29": "SK_I",
54
+ "30": "UDOST_WN",
55
+ "31": "UM_PARTYCY",
56
+ "32": "UZ_SIEC_WK",
57
+ "33": "WAR_WK",
58
+ "34": "WAR_WKKD",
59
+ "35": "WOD_OGR_PRZY",
60
+ "36": "WPIN_SIEC",
61
+ "37": "ZASW_KONC",
62
+ "38": "ZG_ODCZ",
63
+ "39": "ZM_DANYC_ODB",
64
+ "40": "ZM_DAN_ODB",
65
+ "41": "ZM_ZARZADCY",
66
+ "42": "ZW_NADP"
67
  },
68
  "initializer_range": 0.02,
69
  "intermediate_size": 4096,
 
72
  "DANE_ARCH": 1,
73
  "DIERZ_ST_HYD": 2,
74
  "INFO_DW": 3,
75
+ "INSP_TV": 4,
76
  "INTERW_AW_K": 5,
77
  "INTERW_AW_W": 6,
78
  "INTERW_ODTW": 7,
 
84
  "OP_UM": 13,
85
  "POZYTYW": 14,
86
  "POZ_SPR_WIND": 15,
87
+ "PRZE_ODB_PK": 16,
88
+ "PRZE_ODB_PW": 17,
89
+ "PRZE_ODB_PWK": 18,
90
+ "PYT_INFR": 19,
91
+ "PYT_INW": 20,
92
+ "PYT_UMOWA": 21,
93
+ "REKLAMACJA_3": 22,
94
+ "REKLAMACJA_4": 23,
95
+ "REKLAMACJA_6": 24,
96
+ "REKLAMACJA_7": 25,
97
+ "REKLAMACJA_8": 26,
98
+ "REKLAMAC_11": 27,
99
+ "ROW_EKSP": 28,
100
+ "SK_I": 29,
101
+ "UDOST_WN": 30,
102
+ "UM_PARTYCY": 31,
103
+ "UZ_SIEC_WK": 32,
104
+ "WAR_WK": 33,
105
+ "WAR_WKKD": 34,
106
+ "WOD_OGR_PRZY": 35,
107
+ "WPIN_SIEC": 36,
108
+ "ZASW_KONC": 37,
109
+ "ZG_ODCZ": 38,
110
+ "ZM_DANYC_ODB": 39,
111
+ "ZM_DAN_ODB": 40,
112
+ "ZM_ZARZADCY": 41,
113
+ "ZW_NADP": 42
114
  },
115
  "layer_norm_eps": 1e-05,
116
  "max_position_embeddings": 8194,
label_info.json CHANGED
@@ -4,7 +4,7 @@
4
  "DANE_ARCH": 1,
5
  "DIERZ_ST_HYD": 2,
6
  "INFO_DW": 3,
7
- "INSP": 4,
8
  "INTERW_AW_K": 5,
9
  "INTERW_AW_W": 6,
10
  "INTERW_ODTW": 7,
@@ -16,29 +16,40 @@
16
  "OP_UM": 13,
17
  "POZYTYW": 14,
18
  "POZ_SPR_WIND": 15,
19
- "PRZE": 16,
20
- "PYT": 17,
21
- "REKLAMACJA": 18,
22
- "ROW_EKSP": 19,
23
- "SK": 20,
24
- "UDOST_WN": 21,
25
- "UM_PARTYCY": 22,
26
- "UZ_SIEC_WK": 23,
27
- "WAR_WK": 24,
28
- "WAR_WKKD": 25,
29
- "WOD_OGR_PRZY": 26,
30
- "WPIN_SIEC": 27,
31
- "ZASW_KONC": 28,
32
- "ZG_ODCZ": 29,
33
- "ZM": 30,
34
- "ZW_NADP": 31
 
 
 
 
 
 
 
 
 
 
 
35
  },
36
  "id2label": {
37
  "0": "BINFO",
38
  "1": "DANE_ARCH",
39
  "2": "DIERZ_ST_HYD",
40
  "3": "INFO_DW",
41
- "4": "INSP",
42
  "5": "INTERW_AW_K",
43
  "6": "INTERW_AW_W",
44
  "7": "INTERW_ODTW",
@@ -50,22 +61,33 @@
50
  "13": "OP_UM",
51
  "14": "POZYTYW",
52
  "15": "POZ_SPR_WIND",
53
- "16": "PRZE",
54
- "17": "PYT",
55
- "18": "REKLAMACJA",
56
- "19": "ROW_EKSP",
57
- "20": "SK",
58
- "21": "UDOST_WN",
59
- "22": "UM_PARTYCY",
60
- "23": "UZ_SIEC_WK",
61
- "24": "WAR_WK",
62
- "25": "WAR_WKKD",
63
- "26": "WOD_OGR_PRZY",
64
- "27": "WPIN_SIEC",
65
- "28": "ZASW_KONC",
66
- "29": "ZG_ODCZ",
67
- "30": "ZM",
68
- "31": "ZW_NADP"
 
 
 
 
 
 
 
 
 
 
 
69
  },
70
- "num_labels": 32
71
  }
 
4
  "DANE_ARCH": 1,
5
  "DIERZ_ST_HYD": 2,
6
  "INFO_DW": 3,
7
+ "INSP_TV": 4,
8
  "INTERW_AW_K": 5,
9
  "INTERW_AW_W": 6,
10
  "INTERW_ODTW": 7,
 
16
  "OP_UM": 13,
17
  "POZYTYW": 14,
18
  "POZ_SPR_WIND": 15,
19
+ "PRZE_ODB_PK": 16,
20
+ "PRZE_ODB_PW": 17,
21
+ "PRZE_ODB_PWK": 18,
22
+ "PYT_INFR": 19,
23
+ "PYT_INW": 20,
24
+ "PYT_UMOWA": 21,
25
+ "REKLAMACJA_3": 22,
26
+ "REKLAMACJA_4": 23,
27
+ "REKLAMACJA_6": 24,
28
+ "REKLAMACJA_7": 25,
29
+ "REKLAMACJA_8": 26,
30
+ "REKLAMAC_11": 27,
31
+ "ROW_EKSP": 28,
32
+ "SK_I": 29,
33
+ "UDOST_WN": 30,
34
+ "UM_PARTYCY": 31,
35
+ "UZ_SIEC_WK": 32,
36
+ "WAR_WK": 33,
37
+ "WAR_WKKD": 34,
38
+ "WOD_OGR_PRZY": 35,
39
+ "WPIN_SIEC": 36,
40
+ "ZASW_KONC": 37,
41
+ "ZG_ODCZ": 38,
42
+ "ZM_DANYC_ODB": 39,
43
+ "ZM_DAN_ODB": 40,
44
+ "ZM_ZARZADCY": 41,
45
+ "ZW_NADP": 42
46
  },
47
  "id2label": {
48
  "0": "BINFO",
49
  "1": "DANE_ARCH",
50
  "2": "DIERZ_ST_HYD",
51
  "3": "INFO_DW",
52
+ "4": "INSP_TV",
53
  "5": "INTERW_AW_K",
54
  "6": "INTERW_AW_W",
55
  "7": "INTERW_ODTW",
 
61
  "13": "OP_UM",
62
  "14": "POZYTYW",
63
  "15": "POZ_SPR_WIND",
64
+ "16": "PRZE_ODB_PK",
65
+ "17": "PRZE_ODB_PW",
66
+ "18": "PRZE_ODB_PWK",
67
+ "19": "PYT_INFR",
68
+ "20": "PYT_INW",
69
+ "21": "PYT_UMOWA",
70
+ "22": "REKLAMACJA_3",
71
+ "23": "REKLAMACJA_4",
72
+ "24": "REKLAMACJA_6",
73
+ "25": "REKLAMACJA_7",
74
+ "26": "REKLAMACJA_8",
75
+ "27": "REKLAMAC_11",
76
+ "28": "ROW_EKSP",
77
+ "29": "SK_I",
78
+ "30": "UDOST_WN",
79
+ "31": "UM_PARTYCY",
80
+ "32": "UZ_SIEC_WK",
81
+ "33": "WAR_WK",
82
+ "34": "WAR_WKKD",
83
+ "35": "WOD_OGR_PRZY",
84
+ "36": "WPIN_SIEC",
85
+ "37": "ZASW_KONC",
86
+ "38": "ZG_ODCZ",
87
+ "39": "ZM_DANYC_ODB",
88
+ "40": "ZM_DAN_ODB",
89
+ "41": "ZM_ZARZADCY",
90
+ "42": "ZW_NADP"
91
  },
92
+ "num_labels": 43
93
  }
logs/events.out.tfevents.1761002856.a5b7e37e7852.50115.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d61bc4d29a57055dc06cfa2c16602fcf5aa904b7342bb6d0800342f093b5a85
3
+ size 12960
logs/events.out.tfevents.1761003311.a5b7e37e7852.50115.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:788e4ab119932c5d8c12c07bba214fddcfe11e1589e4d63cb1a34e772d5023bc
3
+ size 560
logs/events.out.tfevents.1761003460.a5b7e37e7852.53851.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67583a39c65086f68f5520534385c5a52eff053ab9290ce775db2425178c2239
3
+ size 12960
logs/events.out.tfevents.1761003906.a5b7e37e7852.53851.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c6cc8da4799cd81bb067851484fa5f7c8a858874b0226860dcc7bd0b048ec35
3
+ size 560
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62db5e122f9f41163da858efd8df098e45b4837daa5e352df66f01195be5b08e
3
- size 1771740624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f09e03fa2200d298d28d0e9d00765b5fe9d56b6f10f6cf0be7bc20bef233b968
3
+ size 1771785724
test_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 8.0,
3
- "eval_accuracy": 0.7838150289017342,
4
- "eval_f1": 0.760134834840537,
5
- "eval_loss": 1.1006990671157837,
6
- "eval_precision": 0.7630015333272686,
7
- "eval_recall": 0.7838150289017342,
8
- "eval_runtime": 3.2334,
9
- "eval_samples_per_second": 267.523,
10
- "eval_steps_per_second": 5.876
11
  }
 
1
  {
2
  "epoch": 8.0,
3
+ "eval_accuracy": 0.6913294797687861,
4
+ "eval_f1": 0.6466102267210698,
5
+ "eval_loss": 1.5204323530197144,
6
+ "eval_precision": 0.6262094124143767,
7
+ "eval_recall": 0.6913294797687861,
8
+ "eval_runtime": 3.1459,
9
+ "eval_samples_per_second": 274.962,
10
+ "eval_steps_per_second": 6.04
11
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 8.0,
3
- "total_flos": 2.6699656498043904e+16,
4
- "train_loss": 1.6651537001132966,
5
- "train_runtime": 443.5681,
6
- "train_samples_per_second": 69.257,
7
- "train_steps_per_second": 0.721
8
  }
 
1
  {
2
  "epoch": 8.0,
3
+ "total_flos": 2.653030012194216e+16,
4
+ "train_loss": 2.0375428795814514,
5
+ "train_runtime": 441.0529,
6
+ "train_samples_per_second": 69.652,
7
+ "train_steps_per_second": 0.726
8
  }