Brookseeworld commited on
Commit
03c7ca3
·
verified ·
1 Parent(s): 5b2f845

Upload folder using huggingface_hub

Browse files
Files changed (16) hide show
  1. opt-125m-gemma27b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/best_metrics.json +21 -0
  2. opt-125m-gemma27b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/config.json +235 -0
  3. opt-125m-gemma27b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/model.safetensors +3 -0
  4. opt-125m-gemma27b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/usage_config.json +1 -0
  5. opt-125m-gemma27b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/best_metrics.json +21 -0
  6. opt-125m-gemma27b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/config.json +235 -0
  7. opt-125m-gemma27b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/model.safetensors +3 -0
  8. opt-125m-gemma27b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/usage_config.json +1 -0
  9. opt-125m-llama8b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/best_metrics.json +21 -0
  10. opt-125m-llama8b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/config.json +235 -0
  11. opt-125m-llama8b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/model.safetensors +3 -0
  12. opt-125m-llama8b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/usage_config.json +1 -0
  13. opt-125m-llama8b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/best_metrics.json +21 -0
  14. opt-125m-llama8b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/config.json +235 -0
  15. opt-125m-llama8b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/model.safetensors +3 -0
  16. opt-125m-llama8b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/usage_config.json +1 -0
opt-125m-gemma27b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/best_metrics.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "evaluate_metrics": {
3
+ "tau": 0.6466399600269567,
4
+ "exact_match_acc": 0.43575,
5
+ "off_by_1_acc": 0.6955,
6
+ "off_by_2_acc": 0.81825,
7
+ "bin_mse": 4.69025,
8
+ "length_mse": 31178.705,
9
+ "length_rmse": 176.5749274387505
10
+ },
11
+ "test_metrics": {
12
+ "epoch": 4,
13
+ "tau": 0.6437279565974302,
14
+ "exact_match_acc": 0.45075,
15
+ "off_by_1_acc": 0.7135,
16
+ "off_by_2_acc": 0.83075,
17
+ "bin_mse": 4.1635,
18
+ "length_mse": 27609.662,
19
+ "length_rmse": 166.16155391666268
20
+ }
21
+ }
opt-125m-gemma27b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/config.json ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_remove_final_layer_norm": false,
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "OPTForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 2,
10
+ "do_layer_norm_before": true,
11
+ "dropout": 0.1,
12
+ "enable_bias": true,
13
+ "eos_token_id": 2,
14
+ "ffn_dim": 3072,
15
+ "hidden_size": 768,
16
+ "id2label": {
17
+ "0": "LABEL_0",
18
+ "1": "LABEL_1",
19
+ "2": "LABEL_2",
20
+ "3": "LABEL_3",
21
+ "4": "LABEL_4",
22
+ "5": "LABEL_5",
23
+ "6": "LABEL_6",
24
+ "7": "LABEL_7",
25
+ "8": "LABEL_8",
26
+ "9": "LABEL_9",
27
+ "10": "LABEL_10",
28
+ "11": "LABEL_11",
29
+ "12": "LABEL_12",
30
+ "13": "LABEL_13",
31
+ "14": "LABEL_14",
32
+ "15": "LABEL_15",
33
+ "16": "LABEL_16",
34
+ "17": "LABEL_17",
35
+ "18": "LABEL_18",
36
+ "19": "LABEL_19",
37
+ "20": "LABEL_20",
38
+ "21": "LABEL_21",
39
+ "22": "LABEL_22",
40
+ "23": "LABEL_23",
41
+ "24": "LABEL_24",
42
+ "25": "LABEL_25",
43
+ "26": "LABEL_26",
44
+ "27": "LABEL_27",
45
+ "28": "LABEL_28",
46
+ "29": "LABEL_29",
47
+ "30": "LABEL_30",
48
+ "31": "LABEL_31",
49
+ "32": "LABEL_32",
50
+ "33": "LABEL_33",
51
+ "34": "LABEL_34",
52
+ "35": "LABEL_35",
53
+ "36": "LABEL_36",
54
+ "37": "LABEL_37",
55
+ "38": "LABEL_38",
56
+ "39": "LABEL_39",
57
+ "40": "LABEL_40",
58
+ "41": "LABEL_41",
59
+ "42": "LABEL_42",
60
+ "43": "LABEL_43",
61
+ "44": "LABEL_44",
62
+ "45": "LABEL_45",
63
+ "46": "LABEL_46",
64
+ "47": "LABEL_47",
65
+ "48": "LABEL_48",
66
+ "49": "LABEL_49",
67
+ "50": "LABEL_50",
68
+ "51": "LABEL_51",
69
+ "52": "LABEL_52",
70
+ "53": "LABEL_53",
71
+ "54": "LABEL_54",
72
+ "55": "LABEL_55",
73
+ "56": "LABEL_56",
74
+ "57": "LABEL_57",
75
+ "58": "LABEL_58",
76
+ "59": "LABEL_59",
77
+ "60": "LABEL_60",
78
+ "61": "LABEL_61",
79
+ "62": "LABEL_62",
80
+ "63": "LABEL_63",
81
+ "64": "LABEL_64",
82
+ "65": "LABEL_65",
83
+ "66": "LABEL_66",
84
+ "67": "LABEL_67",
85
+ "68": "LABEL_68",
86
+ "69": "LABEL_69",
87
+ "70": "LABEL_70",
88
+ "71": "LABEL_71",
89
+ "72": "LABEL_72",
90
+ "73": "LABEL_73",
91
+ "74": "LABEL_74",
92
+ "75": "LABEL_75",
93
+ "76": "LABEL_76",
94
+ "77": "LABEL_77",
95
+ "78": "LABEL_78",
96
+ "79": "LABEL_79",
97
+ "80": "LABEL_80",
98
+ "81": "LABEL_81",
99
+ "82": "LABEL_82",
100
+ "83": "LABEL_83",
101
+ "84": "LABEL_84",
102
+ "85": "LABEL_85",
103
+ "86": "LABEL_86",
104
+ "87": "LABEL_87",
105
+ "88": "LABEL_88",
106
+ "89": "LABEL_89",
107
+ "90": "LABEL_90",
108
+ "91": "LABEL_91",
109
+ "92": "LABEL_92",
110
+ "93": "LABEL_93",
111
+ "94": "LABEL_94",
112
+ "95": "LABEL_95",
113
+ "96": "LABEL_96",
114
+ "97": "LABEL_97",
115
+ "98": "LABEL_98",
116
+ "99": "LABEL_99"
117
+ },
118
+ "init_std": 0.02,
119
+ "label2id": {
120
+ "LABEL_0": 0,
121
+ "LABEL_1": 1,
122
+ "LABEL_10": 10,
123
+ "LABEL_11": 11,
124
+ "LABEL_12": 12,
125
+ "LABEL_13": 13,
126
+ "LABEL_14": 14,
127
+ "LABEL_15": 15,
128
+ "LABEL_16": 16,
129
+ "LABEL_17": 17,
130
+ "LABEL_18": 18,
131
+ "LABEL_19": 19,
132
+ "LABEL_2": 2,
133
+ "LABEL_20": 20,
134
+ "LABEL_21": 21,
135
+ "LABEL_22": 22,
136
+ "LABEL_23": 23,
137
+ "LABEL_24": 24,
138
+ "LABEL_25": 25,
139
+ "LABEL_26": 26,
140
+ "LABEL_27": 27,
141
+ "LABEL_28": 28,
142
+ "LABEL_29": 29,
143
+ "LABEL_3": 3,
144
+ "LABEL_30": 30,
145
+ "LABEL_31": 31,
146
+ "LABEL_32": 32,
147
+ "LABEL_33": 33,
148
+ "LABEL_34": 34,
149
+ "LABEL_35": 35,
150
+ "LABEL_36": 36,
151
+ "LABEL_37": 37,
152
+ "LABEL_38": 38,
153
+ "LABEL_39": 39,
154
+ "LABEL_4": 4,
155
+ "LABEL_40": 40,
156
+ "LABEL_41": 41,
157
+ "LABEL_42": 42,
158
+ "LABEL_43": 43,
159
+ "LABEL_44": 44,
160
+ "LABEL_45": 45,
161
+ "LABEL_46": 46,
162
+ "LABEL_47": 47,
163
+ "LABEL_48": 48,
164
+ "LABEL_49": 49,
165
+ "LABEL_5": 5,
166
+ "LABEL_50": 50,
167
+ "LABEL_51": 51,
168
+ "LABEL_52": 52,
169
+ "LABEL_53": 53,
170
+ "LABEL_54": 54,
171
+ "LABEL_55": 55,
172
+ "LABEL_56": 56,
173
+ "LABEL_57": 57,
174
+ "LABEL_58": 58,
175
+ "LABEL_59": 59,
176
+ "LABEL_6": 6,
177
+ "LABEL_60": 60,
178
+ "LABEL_61": 61,
179
+ "LABEL_62": 62,
180
+ "LABEL_63": 63,
181
+ "LABEL_64": 64,
182
+ "LABEL_65": 65,
183
+ "LABEL_66": 66,
184
+ "LABEL_67": 67,
185
+ "LABEL_68": 68,
186
+ "LABEL_69": 69,
187
+ "LABEL_7": 7,
188
+ "LABEL_70": 70,
189
+ "LABEL_71": 71,
190
+ "LABEL_72": 72,
191
+ "LABEL_73": 73,
192
+ "LABEL_74": 74,
193
+ "LABEL_75": 75,
194
+ "LABEL_76": 76,
195
+ "LABEL_77": 77,
196
+ "LABEL_78": 78,
197
+ "LABEL_79": 79,
198
+ "LABEL_8": 8,
199
+ "LABEL_80": 80,
200
+ "LABEL_81": 81,
201
+ "LABEL_82": 82,
202
+ "LABEL_83": 83,
203
+ "LABEL_84": 84,
204
+ "LABEL_85": 85,
205
+ "LABEL_86": 86,
206
+ "LABEL_87": 87,
207
+ "LABEL_88": 88,
208
+ "LABEL_89": 89,
209
+ "LABEL_9": 9,
210
+ "LABEL_90": 90,
211
+ "LABEL_91": 91,
212
+ "LABEL_92": 92,
213
+ "LABEL_93": 93,
214
+ "LABEL_94": 94,
215
+ "LABEL_95": 95,
216
+ "LABEL_96": 96,
217
+ "LABEL_97": 97,
218
+ "LABEL_98": 98,
219
+ "LABEL_99": 99
220
+ },
221
+ "layer_norm_elementwise_affine": true,
222
+ "layerdrop": 0.0,
223
+ "max_position_embeddings": 2048,
224
+ "model_type": "opt",
225
+ "num_attention_heads": 12,
226
+ "num_hidden_layers": 12,
227
+ "num_labels": 100,
228
+ "pad_token_id": 1,
229
+ "prefix": "</s>",
230
+ "torch_dtype": "float16",
231
+ "transformers_version": "4.51.1",
232
+ "use_cache": true,
233
+ "vocab_size": 50272,
234
+ "word_embed_proj_dim": 768
235
+ }
opt-125m-gemma27b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eaf3275bb8147750927ae09423e4205df153b4b7d433e1988aca71c07d96e0b
3
+ size 250654656
opt-125m-gemma27b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/usage_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": {"pred_model": "/root/autodl-pub/models/opt-125", "num_labels": 100, "mtype": "class", "activation": null, "path": "MODELS/opt-125m-gemma27b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned", "max_length": 2048, "max_batch_size": 1000}}
opt-125m-gemma27b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/best_metrics.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "evaluate_metrics": {
3
+ "tau": 0.6106830845300822,
4
+ "exact_match_acc": 0.3485,
5
+ "off_by_1_acc": 0.68225,
6
+ "off_by_2_acc": 0.83925,
7
+ "bin_mse": 3.72975,
8
+ "length_mse": 24480.21075,
9
+ "length_rmse": 156.46153121454486
10
+ },
11
+ "test_metrics": {
12
+ "epoch": 3,
13
+ "tau": 0.5806738482082854,
14
+ "exact_match_acc": 0.3235,
15
+ "off_by_1_acc": 0.671,
16
+ "off_by_2_acc": 0.82375,
17
+ "bin_mse": 3.93875,
18
+ "length_mse": 25859.8075,
19
+ "length_rmse": 160.80984888992342
20
+ }
21
+ }
opt-125m-gemma27b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/config.json ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_remove_final_layer_norm": false,
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "OPTForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 2,
10
+ "do_layer_norm_before": true,
11
+ "dropout": 0.1,
12
+ "enable_bias": true,
13
+ "eos_token_id": 2,
14
+ "ffn_dim": 3072,
15
+ "hidden_size": 768,
16
+ "id2label": {
17
+ "0": "LABEL_0",
18
+ "1": "LABEL_1",
19
+ "2": "LABEL_2",
20
+ "3": "LABEL_3",
21
+ "4": "LABEL_4",
22
+ "5": "LABEL_5",
23
+ "6": "LABEL_6",
24
+ "7": "LABEL_7",
25
+ "8": "LABEL_8",
26
+ "9": "LABEL_9",
27
+ "10": "LABEL_10",
28
+ "11": "LABEL_11",
29
+ "12": "LABEL_12",
30
+ "13": "LABEL_13",
31
+ "14": "LABEL_14",
32
+ "15": "LABEL_15",
33
+ "16": "LABEL_16",
34
+ "17": "LABEL_17",
35
+ "18": "LABEL_18",
36
+ "19": "LABEL_19",
37
+ "20": "LABEL_20",
38
+ "21": "LABEL_21",
39
+ "22": "LABEL_22",
40
+ "23": "LABEL_23",
41
+ "24": "LABEL_24",
42
+ "25": "LABEL_25",
43
+ "26": "LABEL_26",
44
+ "27": "LABEL_27",
45
+ "28": "LABEL_28",
46
+ "29": "LABEL_29",
47
+ "30": "LABEL_30",
48
+ "31": "LABEL_31",
49
+ "32": "LABEL_32",
50
+ "33": "LABEL_33",
51
+ "34": "LABEL_34",
52
+ "35": "LABEL_35",
53
+ "36": "LABEL_36",
54
+ "37": "LABEL_37",
55
+ "38": "LABEL_38",
56
+ "39": "LABEL_39",
57
+ "40": "LABEL_40",
58
+ "41": "LABEL_41",
59
+ "42": "LABEL_42",
60
+ "43": "LABEL_43",
61
+ "44": "LABEL_44",
62
+ "45": "LABEL_45",
63
+ "46": "LABEL_46",
64
+ "47": "LABEL_47",
65
+ "48": "LABEL_48",
66
+ "49": "LABEL_49",
67
+ "50": "LABEL_50",
68
+ "51": "LABEL_51",
69
+ "52": "LABEL_52",
70
+ "53": "LABEL_53",
71
+ "54": "LABEL_54",
72
+ "55": "LABEL_55",
73
+ "56": "LABEL_56",
74
+ "57": "LABEL_57",
75
+ "58": "LABEL_58",
76
+ "59": "LABEL_59",
77
+ "60": "LABEL_60",
78
+ "61": "LABEL_61",
79
+ "62": "LABEL_62",
80
+ "63": "LABEL_63",
81
+ "64": "LABEL_64",
82
+ "65": "LABEL_65",
83
+ "66": "LABEL_66",
84
+ "67": "LABEL_67",
85
+ "68": "LABEL_68",
86
+ "69": "LABEL_69",
87
+ "70": "LABEL_70",
88
+ "71": "LABEL_71",
89
+ "72": "LABEL_72",
90
+ "73": "LABEL_73",
91
+ "74": "LABEL_74",
92
+ "75": "LABEL_75",
93
+ "76": "LABEL_76",
94
+ "77": "LABEL_77",
95
+ "78": "LABEL_78",
96
+ "79": "LABEL_79",
97
+ "80": "LABEL_80",
98
+ "81": "LABEL_81",
99
+ "82": "LABEL_82",
100
+ "83": "LABEL_83",
101
+ "84": "LABEL_84",
102
+ "85": "LABEL_85",
103
+ "86": "LABEL_86",
104
+ "87": "LABEL_87",
105
+ "88": "LABEL_88",
106
+ "89": "LABEL_89",
107
+ "90": "LABEL_90",
108
+ "91": "LABEL_91",
109
+ "92": "LABEL_92",
110
+ "93": "LABEL_93",
111
+ "94": "LABEL_94",
112
+ "95": "LABEL_95",
113
+ "96": "LABEL_96",
114
+ "97": "LABEL_97",
115
+ "98": "LABEL_98",
116
+ "99": "LABEL_99"
117
+ },
118
+ "init_std": 0.02,
119
+ "label2id": {
120
+ "LABEL_0": 0,
121
+ "LABEL_1": 1,
122
+ "LABEL_10": 10,
123
+ "LABEL_11": 11,
124
+ "LABEL_12": 12,
125
+ "LABEL_13": 13,
126
+ "LABEL_14": 14,
127
+ "LABEL_15": 15,
128
+ "LABEL_16": 16,
129
+ "LABEL_17": 17,
130
+ "LABEL_18": 18,
131
+ "LABEL_19": 19,
132
+ "LABEL_2": 2,
133
+ "LABEL_20": 20,
134
+ "LABEL_21": 21,
135
+ "LABEL_22": 22,
136
+ "LABEL_23": 23,
137
+ "LABEL_24": 24,
138
+ "LABEL_25": 25,
139
+ "LABEL_26": 26,
140
+ "LABEL_27": 27,
141
+ "LABEL_28": 28,
142
+ "LABEL_29": 29,
143
+ "LABEL_3": 3,
144
+ "LABEL_30": 30,
145
+ "LABEL_31": 31,
146
+ "LABEL_32": 32,
147
+ "LABEL_33": 33,
148
+ "LABEL_34": 34,
149
+ "LABEL_35": 35,
150
+ "LABEL_36": 36,
151
+ "LABEL_37": 37,
152
+ "LABEL_38": 38,
153
+ "LABEL_39": 39,
154
+ "LABEL_4": 4,
155
+ "LABEL_40": 40,
156
+ "LABEL_41": 41,
157
+ "LABEL_42": 42,
158
+ "LABEL_43": 43,
159
+ "LABEL_44": 44,
160
+ "LABEL_45": 45,
161
+ "LABEL_46": 46,
162
+ "LABEL_47": 47,
163
+ "LABEL_48": 48,
164
+ "LABEL_49": 49,
165
+ "LABEL_5": 5,
166
+ "LABEL_50": 50,
167
+ "LABEL_51": 51,
168
+ "LABEL_52": 52,
169
+ "LABEL_53": 53,
170
+ "LABEL_54": 54,
171
+ "LABEL_55": 55,
172
+ "LABEL_56": 56,
173
+ "LABEL_57": 57,
174
+ "LABEL_58": 58,
175
+ "LABEL_59": 59,
176
+ "LABEL_6": 6,
177
+ "LABEL_60": 60,
178
+ "LABEL_61": 61,
179
+ "LABEL_62": 62,
180
+ "LABEL_63": 63,
181
+ "LABEL_64": 64,
182
+ "LABEL_65": 65,
183
+ "LABEL_66": 66,
184
+ "LABEL_67": 67,
185
+ "LABEL_68": 68,
186
+ "LABEL_69": 69,
187
+ "LABEL_7": 7,
188
+ "LABEL_70": 70,
189
+ "LABEL_71": 71,
190
+ "LABEL_72": 72,
191
+ "LABEL_73": 73,
192
+ "LABEL_74": 74,
193
+ "LABEL_75": 75,
194
+ "LABEL_76": 76,
195
+ "LABEL_77": 77,
196
+ "LABEL_78": 78,
197
+ "LABEL_79": 79,
198
+ "LABEL_8": 8,
199
+ "LABEL_80": 80,
200
+ "LABEL_81": 81,
201
+ "LABEL_82": 82,
202
+ "LABEL_83": 83,
203
+ "LABEL_84": 84,
204
+ "LABEL_85": 85,
205
+ "LABEL_86": 86,
206
+ "LABEL_87": 87,
207
+ "LABEL_88": 88,
208
+ "LABEL_89": 89,
209
+ "LABEL_9": 9,
210
+ "LABEL_90": 90,
211
+ "LABEL_91": 91,
212
+ "LABEL_92": 92,
213
+ "LABEL_93": 93,
214
+ "LABEL_94": 94,
215
+ "LABEL_95": 95,
216
+ "LABEL_96": 96,
217
+ "LABEL_97": 97,
218
+ "LABEL_98": 98,
219
+ "LABEL_99": 99
220
+ },
221
+ "layer_norm_elementwise_affine": true,
222
+ "layerdrop": 0.0,
223
+ "max_position_embeddings": 2048,
224
+ "model_type": "opt",
225
+ "num_attention_heads": 12,
226
+ "num_hidden_layers": 12,
227
+ "num_labels": 100,
228
+ "pad_token_id": 1,
229
+ "prefix": "</s>",
230
+ "torch_dtype": "float16",
231
+ "transformers_version": "4.51.1",
232
+ "use_cache": true,
233
+ "vocab_size": 50272,
234
+ "word_embed_proj_dim": 768
235
+ }
opt-125m-gemma27b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a0227dee7ef57185523a3c32f81f02523afeab1119bc507bd03b4384d99cca6
3
+ size 250654656
opt-125m-gemma27b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/usage_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": {"pred_model": "/root/autodl-pub/models/opt-125", "num_labels": 100, "mtype": "class", "activation": null, "path": "MODELS/opt-125m-gemma27b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned", "max_length": 2048, "max_batch_size": 1000}}
opt-125m-llama8b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/best_metrics.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "evaluate_metrics": {
3
+ "tau": 0.6134366600990357,
4
+ "exact_match_acc": 0.412,
5
+ "off_by_1_acc": 0.67125,
6
+ "off_by_2_acc": 0.8095,
7
+ "bin_mse": 5.74025,
8
+ "length_mse": 38508.7955,
9
+ "length_rmse": 196.23658043290501
10
+ },
11
+ "test_metrics": {
12
+ "epoch": 3,
13
+ "tau": 0.6164650804782883,
14
+ "exact_match_acc": 0.4105,
15
+ "off_by_1_acc": 0.67225,
16
+ "off_by_2_acc": 0.80225,
17
+ "bin_mse": 5.28675,
18
+ "length_mse": 35531.2655,
19
+ "length_rmse": 188.4973885760755
20
+ }
21
+ }
opt-125m-llama8b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/config.json ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_remove_final_layer_norm": false,
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "OPTForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 2,
10
+ "do_layer_norm_before": true,
11
+ "dropout": 0.1,
12
+ "enable_bias": true,
13
+ "eos_token_id": 2,
14
+ "ffn_dim": 3072,
15
+ "hidden_size": 768,
16
+ "id2label": {
17
+ "0": "LABEL_0",
18
+ "1": "LABEL_1",
19
+ "2": "LABEL_2",
20
+ "3": "LABEL_3",
21
+ "4": "LABEL_4",
22
+ "5": "LABEL_5",
23
+ "6": "LABEL_6",
24
+ "7": "LABEL_7",
25
+ "8": "LABEL_8",
26
+ "9": "LABEL_9",
27
+ "10": "LABEL_10",
28
+ "11": "LABEL_11",
29
+ "12": "LABEL_12",
30
+ "13": "LABEL_13",
31
+ "14": "LABEL_14",
32
+ "15": "LABEL_15",
33
+ "16": "LABEL_16",
34
+ "17": "LABEL_17",
35
+ "18": "LABEL_18",
36
+ "19": "LABEL_19",
37
+ "20": "LABEL_20",
38
+ "21": "LABEL_21",
39
+ "22": "LABEL_22",
40
+ "23": "LABEL_23",
41
+ "24": "LABEL_24",
42
+ "25": "LABEL_25",
43
+ "26": "LABEL_26",
44
+ "27": "LABEL_27",
45
+ "28": "LABEL_28",
46
+ "29": "LABEL_29",
47
+ "30": "LABEL_30",
48
+ "31": "LABEL_31",
49
+ "32": "LABEL_32",
50
+ "33": "LABEL_33",
51
+ "34": "LABEL_34",
52
+ "35": "LABEL_35",
53
+ "36": "LABEL_36",
54
+ "37": "LABEL_37",
55
+ "38": "LABEL_38",
56
+ "39": "LABEL_39",
57
+ "40": "LABEL_40",
58
+ "41": "LABEL_41",
59
+ "42": "LABEL_42",
60
+ "43": "LABEL_43",
61
+ "44": "LABEL_44",
62
+ "45": "LABEL_45",
63
+ "46": "LABEL_46",
64
+ "47": "LABEL_47",
65
+ "48": "LABEL_48",
66
+ "49": "LABEL_49",
67
+ "50": "LABEL_50",
68
+ "51": "LABEL_51",
69
+ "52": "LABEL_52",
70
+ "53": "LABEL_53",
71
+ "54": "LABEL_54",
72
+ "55": "LABEL_55",
73
+ "56": "LABEL_56",
74
+ "57": "LABEL_57",
75
+ "58": "LABEL_58",
76
+ "59": "LABEL_59",
77
+ "60": "LABEL_60",
78
+ "61": "LABEL_61",
79
+ "62": "LABEL_62",
80
+ "63": "LABEL_63",
81
+ "64": "LABEL_64",
82
+ "65": "LABEL_65",
83
+ "66": "LABEL_66",
84
+ "67": "LABEL_67",
85
+ "68": "LABEL_68",
86
+ "69": "LABEL_69",
87
+ "70": "LABEL_70",
88
+ "71": "LABEL_71",
89
+ "72": "LABEL_72",
90
+ "73": "LABEL_73",
91
+ "74": "LABEL_74",
92
+ "75": "LABEL_75",
93
+ "76": "LABEL_76",
94
+ "77": "LABEL_77",
95
+ "78": "LABEL_78",
96
+ "79": "LABEL_79",
97
+ "80": "LABEL_80",
98
+ "81": "LABEL_81",
99
+ "82": "LABEL_82",
100
+ "83": "LABEL_83",
101
+ "84": "LABEL_84",
102
+ "85": "LABEL_85",
103
+ "86": "LABEL_86",
104
+ "87": "LABEL_87",
105
+ "88": "LABEL_88",
106
+ "89": "LABEL_89",
107
+ "90": "LABEL_90",
108
+ "91": "LABEL_91",
109
+ "92": "LABEL_92",
110
+ "93": "LABEL_93",
111
+ "94": "LABEL_94",
112
+ "95": "LABEL_95",
113
+ "96": "LABEL_96",
114
+ "97": "LABEL_97",
115
+ "98": "LABEL_98",
116
+ "99": "LABEL_99"
117
+ },
118
+ "init_std": 0.02,
119
+ "label2id": {
120
+ "LABEL_0": 0,
121
+ "LABEL_1": 1,
122
+ "LABEL_10": 10,
123
+ "LABEL_11": 11,
124
+ "LABEL_12": 12,
125
+ "LABEL_13": 13,
126
+ "LABEL_14": 14,
127
+ "LABEL_15": 15,
128
+ "LABEL_16": 16,
129
+ "LABEL_17": 17,
130
+ "LABEL_18": 18,
131
+ "LABEL_19": 19,
132
+ "LABEL_2": 2,
133
+ "LABEL_20": 20,
134
+ "LABEL_21": 21,
135
+ "LABEL_22": 22,
136
+ "LABEL_23": 23,
137
+ "LABEL_24": 24,
138
+ "LABEL_25": 25,
139
+ "LABEL_26": 26,
140
+ "LABEL_27": 27,
141
+ "LABEL_28": 28,
142
+ "LABEL_29": 29,
143
+ "LABEL_3": 3,
144
+ "LABEL_30": 30,
145
+ "LABEL_31": 31,
146
+ "LABEL_32": 32,
147
+ "LABEL_33": 33,
148
+ "LABEL_34": 34,
149
+ "LABEL_35": 35,
150
+ "LABEL_36": 36,
151
+ "LABEL_37": 37,
152
+ "LABEL_38": 38,
153
+ "LABEL_39": 39,
154
+ "LABEL_4": 4,
155
+ "LABEL_40": 40,
156
+ "LABEL_41": 41,
157
+ "LABEL_42": 42,
158
+ "LABEL_43": 43,
159
+ "LABEL_44": 44,
160
+ "LABEL_45": 45,
161
+ "LABEL_46": 46,
162
+ "LABEL_47": 47,
163
+ "LABEL_48": 48,
164
+ "LABEL_49": 49,
165
+ "LABEL_5": 5,
166
+ "LABEL_50": 50,
167
+ "LABEL_51": 51,
168
+ "LABEL_52": 52,
169
+ "LABEL_53": 53,
170
+ "LABEL_54": 54,
171
+ "LABEL_55": 55,
172
+ "LABEL_56": 56,
173
+ "LABEL_57": 57,
174
+ "LABEL_58": 58,
175
+ "LABEL_59": 59,
176
+ "LABEL_6": 6,
177
+ "LABEL_60": 60,
178
+ "LABEL_61": 61,
179
+ "LABEL_62": 62,
180
+ "LABEL_63": 63,
181
+ "LABEL_64": 64,
182
+ "LABEL_65": 65,
183
+ "LABEL_66": 66,
184
+ "LABEL_67": 67,
185
+ "LABEL_68": 68,
186
+ "LABEL_69": 69,
187
+ "LABEL_7": 7,
188
+ "LABEL_70": 70,
189
+ "LABEL_71": 71,
190
+ "LABEL_72": 72,
191
+ "LABEL_73": 73,
192
+ "LABEL_74": 74,
193
+ "LABEL_75": 75,
194
+ "LABEL_76": 76,
195
+ "LABEL_77": 77,
196
+ "LABEL_78": 78,
197
+ "LABEL_79": 79,
198
+ "LABEL_8": 8,
199
+ "LABEL_80": 80,
200
+ "LABEL_81": 81,
201
+ "LABEL_82": 82,
202
+ "LABEL_83": 83,
203
+ "LABEL_84": 84,
204
+ "LABEL_85": 85,
205
+ "LABEL_86": 86,
206
+ "LABEL_87": 87,
207
+ "LABEL_88": 88,
208
+ "LABEL_89": 89,
209
+ "LABEL_9": 9,
210
+ "LABEL_90": 90,
211
+ "LABEL_91": 91,
212
+ "LABEL_92": 92,
213
+ "LABEL_93": 93,
214
+ "LABEL_94": 94,
215
+ "LABEL_95": 95,
216
+ "LABEL_96": 96,
217
+ "LABEL_97": 97,
218
+ "LABEL_98": 98,
219
+ "LABEL_99": 99
220
+ },
221
+ "layer_norm_elementwise_affine": true,
222
+ "layerdrop": 0.0,
223
+ "max_position_embeddings": 2048,
224
+ "model_type": "opt",
225
+ "num_attention_heads": 12,
226
+ "num_hidden_layers": 12,
227
+ "num_labels": 100,
228
+ "pad_token_id": 1,
229
+ "prefix": "</s>",
230
+ "torch_dtype": "float16",
231
+ "transformers_version": "4.51.1",
232
+ "use_cache": true,
233
+ "vocab_size": 50272,
234
+ "word_embed_proj_dim": 768
235
+ }
opt-125m-llama8b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ffc36104cc2d8a91a751be209485022bd6e9a794b10966389fd45c8173088e4
3
+ size 250654656
opt-125m-llama8b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/usage_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": {"pred_model": "/root/autodl-pub/models/opt-125", "num_labels": 100, "mtype": "class", "activation": null, "path": "MODELS/opt-125m-llama8b-lmsys-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned", "max_length": 2048, "max_batch_size": 1000}}
opt-125m-llama8b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/best_metrics.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "evaluate_metrics": {
3
+ "tau": 0.5364260134412844,
4
+ "exact_match_acc": 0.27,
5
+ "off_by_1_acc": 0.61325,
6
+ "off_by_2_acc": 0.79675,
7
+ "bin_mse": 5.788,
8
+ "length_mse": 38327.91075,
9
+ "length_rmse": 195.7751535563183
10
+ },
11
+ "test_metrics": {
12
+ "epoch": 3,
13
+ "tau": 0.5462135821376071,
14
+ "exact_match_acc": 0.2745,
15
+ "off_by_1_acc": 0.60625,
16
+ "off_by_2_acc": 0.7995,
17
+ "bin_mse": 6.9775,
18
+ "length_mse": 46647.76825,
19
+ "length_rmse": 215.98094418258293
20
+ }
21
+ }
opt-125m-llama8b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/config.json ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_remove_final_layer_norm": false,
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "OPTForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 2,
10
+ "do_layer_norm_before": true,
11
+ "dropout": 0.1,
12
+ "enable_bias": true,
13
+ "eos_token_id": 2,
14
+ "ffn_dim": 3072,
15
+ "hidden_size": 768,
16
+ "id2label": {
17
+ "0": "LABEL_0",
18
+ "1": "LABEL_1",
19
+ "2": "LABEL_2",
20
+ "3": "LABEL_3",
21
+ "4": "LABEL_4",
22
+ "5": "LABEL_5",
23
+ "6": "LABEL_6",
24
+ "7": "LABEL_7",
25
+ "8": "LABEL_8",
26
+ "9": "LABEL_9",
27
+ "10": "LABEL_10",
28
+ "11": "LABEL_11",
29
+ "12": "LABEL_12",
30
+ "13": "LABEL_13",
31
+ "14": "LABEL_14",
32
+ "15": "LABEL_15",
33
+ "16": "LABEL_16",
34
+ "17": "LABEL_17",
35
+ "18": "LABEL_18",
36
+ "19": "LABEL_19",
37
+ "20": "LABEL_20",
38
+ "21": "LABEL_21",
39
+ "22": "LABEL_22",
40
+ "23": "LABEL_23",
41
+ "24": "LABEL_24",
42
+ "25": "LABEL_25",
43
+ "26": "LABEL_26",
44
+ "27": "LABEL_27",
45
+ "28": "LABEL_28",
46
+ "29": "LABEL_29",
47
+ "30": "LABEL_30",
48
+ "31": "LABEL_31",
49
+ "32": "LABEL_32",
50
+ "33": "LABEL_33",
51
+ "34": "LABEL_34",
52
+ "35": "LABEL_35",
53
+ "36": "LABEL_36",
54
+ "37": "LABEL_37",
55
+ "38": "LABEL_38",
56
+ "39": "LABEL_39",
57
+ "40": "LABEL_40",
58
+ "41": "LABEL_41",
59
+ "42": "LABEL_42",
60
+ "43": "LABEL_43",
61
+ "44": "LABEL_44",
62
+ "45": "LABEL_45",
63
+ "46": "LABEL_46",
64
+ "47": "LABEL_47",
65
+ "48": "LABEL_48",
66
+ "49": "LABEL_49",
67
+ "50": "LABEL_50",
68
+ "51": "LABEL_51",
69
+ "52": "LABEL_52",
70
+ "53": "LABEL_53",
71
+ "54": "LABEL_54",
72
+ "55": "LABEL_55",
73
+ "56": "LABEL_56",
74
+ "57": "LABEL_57",
75
+ "58": "LABEL_58",
76
+ "59": "LABEL_59",
77
+ "60": "LABEL_60",
78
+ "61": "LABEL_61",
79
+ "62": "LABEL_62",
80
+ "63": "LABEL_63",
81
+ "64": "LABEL_64",
82
+ "65": "LABEL_65",
83
+ "66": "LABEL_66",
84
+ "67": "LABEL_67",
85
+ "68": "LABEL_68",
86
+ "69": "LABEL_69",
87
+ "70": "LABEL_70",
88
+ "71": "LABEL_71",
89
+ "72": "LABEL_72",
90
+ "73": "LABEL_73",
91
+ "74": "LABEL_74",
92
+ "75": "LABEL_75",
93
+ "76": "LABEL_76",
94
+ "77": "LABEL_77",
95
+ "78": "LABEL_78",
96
+ "79": "LABEL_79",
97
+ "80": "LABEL_80",
98
+ "81": "LABEL_81",
99
+ "82": "LABEL_82",
100
+ "83": "LABEL_83",
101
+ "84": "LABEL_84",
102
+ "85": "LABEL_85",
103
+ "86": "LABEL_86",
104
+ "87": "LABEL_87",
105
+ "88": "LABEL_88",
106
+ "89": "LABEL_89",
107
+ "90": "LABEL_90",
108
+ "91": "LABEL_91",
109
+ "92": "LABEL_92",
110
+ "93": "LABEL_93",
111
+ "94": "LABEL_94",
112
+ "95": "LABEL_95",
113
+ "96": "LABEL_96",
114
+ "97": "LABEL_97",
115
+ "98": "LABEL_98",
116
+ "99": "LABEL_99"
117
+ },
118
+ "init_std": 0.02,
119
+ "label2id": {
120
+ "LABEL_0": 0,
121
+ "LABEL_1": 1,
122
+ "LABEL_10": 10,
123
+ "LABEL_11": 11,
124
+ "LABEL_12": 12,
125
+ "LABEL_13": 13,
126
+ "LABEL_14": 14,
127
+ "LABEL_15": 15,
128
+ "LABEL_16": 16,
129
+ "LABEL_17": 17,
130
+ "LABEL_18": 18,
131
+ "LABEL_19": 19,
132
+ "LABEL_2": 2,
133
+ "LABEL_20": 20,
134
+ "LABEL_21": 21,
135
+ "LABEL_22": 22,
136
+ "LABEL_23": 23,
137
+ "LABEL_24": 24,
138
+ "LABEL_25": 25,
139
+ "LABEL_26": 26,
140
+ "LABEL_27": 27,
141
+ "LABEL_28": 28,
142
+ "LABEL_29": 29,
143
+ "LABEL_3": 3,
144
+ "LABEL_30": 30,
145
+ "LABEL_31": 31,
146
+ "LABEL_32": 32,
147
+ "LABEL_33": 33,
148
+ "LABEL_34": 34,
149
+ "LABEL_35": 35,
150
+ "LABEL_36": 36,
151
+ "LABEL_37": 37,
152
+ "LABEL_38": 38,
153
+ "LABEL_39": 39,
154
+ "LABEL_4": 4,
155
+ "LABEL_40": 40,
156
+ "LABEL_41": 41,
157
+ "LABEL_42": 42,
158
+ "LABEL_43": 43,
159
+ "LABEL_44": 44,
160
+ "LABEL_45": 45,
161
+ "LABEL_46": 46,
162
+ "LABEL_47": 47,
163
+ "LABEL_48": 48,
164
+ "LABEL_49": 49,
165
+ "LABEL_5": 5,
166
+ "LABEL_50": 50,
167
+ "LABEL_51": 51,
168
+ "LABEL_52": 52,
169
+ "LABEL_53": 53,
170
+ "LABEL_54": 54,
171
+ "LABEL_55": 55,
172
+ "LABEL_56": 56,
173
+ "LABEL_57": 57,
174
+ "LABEL_58": 58,
175
+ "LABEL_59": 59,
176
+ "LABEL_6": 6,
177
+ "LABEL_60": 60,
178
+ "LABEL_61": 61,
179
+ "LABEL_62": 62,
180
+ "LABEL_63": 63,
181
+ "LABEL_64": 64,
182
+ "LABEL_65": 65,
183
+ "LABEL_66": 66,
184
+ "LABEL_67": 67,
185
+ "LABEL_68": 68,
186
+ "LABEL_69": 69,
187
+ "LABEL_7": 7,
188
+ "LABEL_70": 70,
189
+ "LABEL_71": 71,
190
+ "LABEL_72": 72,
191
+ "LABEL_73": 73,
192
+ "LABEL_74": 74,
193
+ "LABEL_75": 75,
194
+ "LABEL_76": 76,
195
+ "LABEL_77": 77,
196
+ "LABEL_78": 78,
197
+ "LABEL_79": 79,
198
+ "LABEL_8": 8,
199
+ "LABEL_80": 80,
200
+ "LABEL_81": 81,
201
+ "LABEL_82": 82,
202
+ "LABEL_83": 83,
203
+ "LABEL_84": 84,
204
+ "LABEL_85": 85,
205
+ "LABEL_86": 86,
206
+ "LABEL_87": 87,
207
+ "LABEL_88": 88,
208
+ "LABEL_89": 89,
209
+ "LABEL_9": 9,
210
+ "LABEL_90": 90,
211
+ "LABEL_91": 91,
212
+ "LABEL_92": 92,
213
+ "LABEL_93": 93,
214
+ "LABEL_94": 94,
215
+ "LABEL_95": 95,
216
+ "LABEL_96": 96,
217
+ "LABEL_97": 97,
218
+ "LABEL_98": 98,
219
+ "LABEL_99": 99
220
+ },
221
+ "layer_norm_elementwise_affine": true,
222
+ "layerdrop": 0.0,
223
+ "max_position_embeddings": 2048,
224
+ "model_type": "opt",
225
+ "num_attention_heads": 12,
226
+ "num_hidden_layers": 12,
227
+ "num_labels": 100,
228
+ "pad_token_id": 1,
229
+ "prefix": "</s>",
230
+ "torch_dtype": "float16",
231
+ "transformers_version": "4.51.1",
232
+ "use_cache": true,
233
+ "vocab_size": 50272,
234
+ "word_embed_proj_dim": 768
235
+ }
opt-125m-llama8b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32735c057f8d9620c1bccaf83896dd7e7c73fb4e4a39691448929983b2022d49
3
+ size 250654656
opt-125m-llama8b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/usage_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": {"pred_model": "/root/autodl-pub/models/opt-125", "num_labels": 100, "mtype": "class", "activation": null, "path": "MODELS/opt-125m-llama8b-sharegpt-equalwidth-numbucket100-bucketsize82-bs64-e8/finetuned", "max_length": 2048, "max_batch_size": 1000}}