Omartificial-Intelligence-Space commited on
Commit
3ccf048
·
verified ·
1 Parent(s): e9446d6

Upload 17 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
.ipynb_checkpoints/trainer_state-checkpoint.json ADDED
@@ -0,0 +1,782 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
+ "eval_steps": 5000,
7
+ "global_step": 84375,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.035555555555555556,
14
+ "grad_norm": 31.097620010375977,
15
+ "learning_rate": 5.91964920597298e-06,
16
+ "loss": 1.6025,
17
+ "step": 1000
18
+ },
19
+ {
20
+ "epoch": 0.07111111111111111,
21
+ "grad_norm": 15.004558563232422,
22
+ "learning_rate": 1.1845223986726713e-05,
23
+ "loss": 0.5208,
24
+ "step": 2000
25
+ },
26
+ {
27
+ "epoch": 0.10666666666666667,
28
+ "grad_norm": 18.05305290222168,
29
+ "learning_rate": 1.7770798767480447e-05,
30
+ "loss": 0.4374,
31
+ "step": 3000
32
+ },
33
+ {
34
+ "epoch": 0.14222222222222222,
35
+ "grad_norm": 15.441234588623047,
36
+ "learning_rate": 2.369637354823418e-05,
37
+ "loss": 0.4142,
38
+ "step": 4000
39
+ },
40
+ {
41
+ "epoch": 0.17777777777777778,
42
+ "grad_norm": 11.936789512634277,
43
+ "learning_rate": 2.9621948328987915e-05,
44
+ "loss": 0.3916,
45
+ "step": 5000
46
+ },
47
+ {
48
+ "epoch": 0.17777777777777778,
49
+ "eval_loss": 0.3775472640991211,
50
+ "eval_runtime": 11.6508,
51
+ "eval_samples_per_second": 85.831,
52
+ "eval_steps_per_second": 2.747,
53
+ "eval_sts-dev_pearson_cosine": 0.8150497373285317,
54
+ "eval_sts-dev_spearman_cosine": 0.8196131231126629,
55
+ "step": 5000
56
+ },
57
+ {
58
+ "epoch": 0.21333333333333335,
59
+ "grad_norm": 12.518898963928223,
60
+ "learning_rate": 3.554752310974165e-05,
61
+ "loss": 0.382,
62
+ "step": 6000
63
+ },
64
+ {
65
+ "epoch": 0.24888888888888888,
66
+ "grad_norm": 9.117196083068848,
67
+ "learning_rate": 4.147309789049538e-05,
68
+ "loss": 0.3824,
69
+ "step": 7000
70
+ },
71
+ {
72
+ "epoch": 0.28444444444444444,
73
+ "grad_norm": 9.207646369934082,
74
+ "learning_rate": 4.739867267124911e-05,
75
+ "loss": 0.3744,
76
+ "step": 8000
77
+ },
78
+ {
79
+ "epoch": 0.32,
80
+ "grad_norm": 8.699370384216309,
81
+ "learning_rate": 4.9630614851785036e-05,
82
+ "loss": 0.3781,
83
+ "step": 9000
84
+ },
85
+ {
86
+ "epoch": 0.35555555555555557,
87
+ "grad_norm": 5.3027729988098145,
88
+ "learning_rate": 4.897217430238224e-05,
89
+ "loss": 0.367,
90
+ "step": 10000
91
+ },
92
+ {
93
+ "epoch": 0.35555555555555557,
94
+ "eval_loss": 0.37576642632484436,
95
+ "eval_runtime": 35.1214,
96
+ "eval_samples_per_second": 28.473,
97
+ "eval_steps_per_second": 0.911,
98
+ "eval_sts-dev_pearson_cosine": 0.8076134919907783,
99
+ "eval_sts-dev_spearman_cosine": 0.8136231582786481,
100
+ "step": 10000
101
+ },
102
+ {
103
+ "epoch": 0.39111111111111113,
104
+ "grad_norm": 13.608588218688965,
105
+ "learning_rate": 4.831373375297945e-05,
106
+ "loss": 0.3527,
107
+ "step": 11000
108
+ },
109
+ {
110
+ "epoch": 0.4266666666666667,
111
+ "grad_norm": 2.712616443634033,
112
+ "learning_rate": 4.765529320357665e-05,
113
+ "loss": 0.3354,
114
+ "step": 12000
115
+ },
116
+ {
117
+ "epoch": 0.4622222222222222,
118
+ "grad_norm": 7.297984600067139,
119
+ "learning_rate": 4.699685265417386e-05,
120
+ "loss": 0.3147,
121
+ "step": 13000
122
+ },
123
+ {
124
+ "epoch": 0.49777777777777776,
125
+ "grad_norm": 3.9520838260650635,
126
+ "learning_rate": 4.633841210477106e-05,
127
+ "loss": 0.3084,
128
+ "step": 14000
129
+ },
130
+ {
131
+ "epoch": 0.5333333333333333,
132
+ "grad_norm": 3.8519201278686523,
133
+ "learning_rate": 4.567997155536827e-05,
134
+ "loss": 0.2975,
135
+ "step": 15000
136
+ },
137
+ {
138
+ "epoch": 0.5333333333333333,
139
+ "eval_loss": 0.35147717595100403,
140
+ "eval_runtime": 30.6971,
141
+ "eval_samples_per_second": 32.576,
142
+ "eval_steps_per_second": 1.042,
143
+ "eval_sts-dev_pearson_cosine": 0.8128354003998368,
144
+ "eval_sts-dev_spearman_cosine": 0.8205304242394997,
145
+ "step": 15000
146
+ },
147
+ {
148
+ "epoch": 0.5688888888888889,
149
+ "grad_norm": 7.860720634460449,
150
+ "learning_rate": 4.502153100596547e-05,
151
+ "loss": 0.2978,
152
+ "step": 16000
153
+ },
154
+ {
155
+ "epoch": 0.6044444444444445,
156
+ "grad_norm": 7.748737335205078,
157
+ "learning_rate": 4.436309045656268e-05,
158
+ "loss": 0.2916,
159
+ "step": 17000
160
+ },
161
+ {
162
+ "epoch": 0.64,
163
+ "grad_norm": 8.615702629089355,
164
+ "learning_rate": 4.370464990715988e-05,
165
+ "loss": 0.2792,
166
+ "step": 18000
167
+ },
168
+ {
169
+ "epoch": 0.6755555555555556,
170
+ "grad_norm": 5.2580156326293945,
171
+ "learning_rate": 4.304620935775709e-05,
172
+ "loss": 0.2661,
173
+ "step": 19000
174
+ },
175
+ {
176
+ "epoch": 0.7111111111111111,
177
+ "grad_norm": 9.318315505981445,
178
+ "learning_rate": 4.238776880835429e-05,
179
+ "loss": 0.2583,
180
+ "step": 20000
181
+ },
182
+ {
183
+ "epoch": 0.7111111111111111,
184
+ "eval_loss": 0.3185396194458008,
185
+ "eval_runtime": 30.8313,
186
+ "eval_samples_per_second": 32.435,
187
+ "eval_steps_per_second": 1.038,
188
+ "eval_sts-dev_pearson_cosine": 0.8058349279884176,
189
+ "eval_sts-dev_spearman_cosine": 0.8159206996876398,
190
+ "step": 20000
191
+ },
192
+ {
193
+ "epoch": 0.7466666666666667,
194
+ "grad_norm": 4.720574855804443,
195
+ "learning_rate": 4.17293282589515e-05,
196
+ "loss": 0.2529,
197
+ "step": 21000
198
+ },
199
+ {
200
+ "epoch": 0.7822222222222223,
201
+ "grad_norm": 6.6863484382629395,
202
+ "learning_rate": 4.10708877095487e-05,
203
+ "loss": 0.2505,
204
+ "step": 22000
205
+ },
206
+ {
207
+ "epoch": 0.8177777777777778,
208
+ "grad_norm": 3.1214375495910645,
209
+ "learning_rate": 4.041244716014591e-05,
210
+ "loss": 0.2454,
211
+ "step": 23000
212
+ },
213
+ {
214
+ "epoch": 0.8533333333333334,
215
+ "grad_norm": 0.6488115787506104,
216
+ "learning_rate": 3.9754006610743114e-05,
217
+ "loss": 0.242,
218
+ "step": 24000
219
+ },
220
+ {
221
+ "epoch": 0.8888888888888888,
222
+ "grad_norm": 5.3746113777160645,
223
+ "learning_rate": 3.909556606134032e-05,
224
+ "loss": 0.2307,
225
+ "step": 25000
226
+ },
227
+ {
228
+ "epoch": 0.8888888888888888,
229
+ "eval_loss": 0.28817781805992126,
230
+ "eval_runtime": 31.3269,
231
+ "eval_samples_per_second": 31.921,
232
+ "eval_steps_per_second": 1.021,
233
+ "eval_sts-dev_pearson_cosine": 0.8068056591060926,
234
+ "eval_sts-dev_spearman_cosine": 0.8175004911072571,
235
+ "step": 25000
236
+ },
237
+ {
238
+ "epoch": 0.9244444444444444,
239
+ "grad_norm": 7.79231595993042,
240
+ "learning_rate": 3.8437125511937525e-05,
241
+ "loss": 0.2349,
242
+ "step": 26000
243
+ },
244
+ {
245
+ "epoch": 0.96,
246
+ "grad_norm": 9.042169570922852,
247
+ "learning_rate": 3.7778684962534734e-05,
248
+ "loss": 0.2238,
249
+ "step": 27000
250
+ },
251
+ {
252
+ "epoch": 0.9955555555555555,
253
+ "grad_norm": 10.183342933654785,
254
+ "learning_rate": 3.7120244413131936e-05,
255
+ "loss": 0.2132,
256
+ "step": 28000
257
+ },
258
+ {
259
+ "epoch": 1.031111111111111,
260
+ "grad_norm": 1.6765131950378418,
261
+ "learning_rate": 3.6461803863729144e-05,
262
+ "loss": 0.1601,
263
+ "step": 29000
264
+ },
265
+ {
266
+ "epoch": 1.0666666666666667,
267
+ "grad_norm": 2.5507757663726807,
268
+ "learning_rate": 3.5803363314326346e-05,
269
+ "loss": 0.1581,
270
+ "step": 30000
271
+ },
272
+ {
273
+ "epoch": 1.0666666666666667,
274
+ "eval_loss": 0.2580932080745697,
275
+ "eval_runtime": 31.0615,
276
+ "eval_samples_per_second": 32.194,
277
+ "eval_steps_per_second": 1.03,
278
+ "eval_sts-dev_pearson_cosine": 0.8157474163754647,
279
+ "eval_sts-dev_spearman_cosine": 0.8222299144499523,
280
+ "step": 30000
281
+ },
282
+ {
283
+ "epoch": 1.1022222222222222,
284
+ "grad_norm": 2.3133509159088135,
285
+ "learning_rate": 3.5144922764923555e-05,
286
+ "loss": 0.1532,
287
+ "step": 31000
288
+ },
289
+ {
290
+ "epoch": 1.1377777777777778,
291
+ "grad_norm": 2.026036262512207,
292
+ "learning_rate": 3.448648221552076e-05,
293
+ "loss": 0.1494,
294
+ "step": 32000
295
+ },
296
+ {
297
+ "epoch": 1.1733333333333333,
298
+ "grad_norm": 5.352447986602783,
299
+ "learning_rate": 3.3828041666117966e-05,
300
+ "loss": 0.1484,
301
+ "step": 33000
302
+ },
303
+ {
304
+ "epoch": 1.208888888888889,
305
+ "grad_norm": 2.9670166969299316,
306
+ "learning_rate": 3.316960111671517e-05,
307
+ "loss": 0.1529,
308
+ "step": 34000
309
+ },
310
+ {
311
+ "epoch": 1.2444444444444445,
312
+ "grad_norm": 6.357540607452393,
313
+ "learning_rate": 3.251116056731238e-05,
314
+ "loss": 0.1467,
315
+ "step": 35000
316
+ },
317
+ {
318
+ "epoch": 1.2444444444444445,
319
+ "eval_loss": 0.24440895020961761,
320
+ "eval_runtime": 31.2456,
321
+ "eval_samples_per_second": 32.005,
322
+ "eval_steps_per_second": 1.024,
323
+ "eval_sts-dev_pearson_cosine": 0.814902241190906,
324
+ "eval_sts-dev_spearman_cosine": 0.8211414049575237,
325
+ "step": 35000
326
+ },
327
+ {
328
+ "epoch": 1.28,
329
+ "grad_norm": 6.782201766967773,
330
+ "learning_rate": 3.185272001790958e-05,
331
+ "loss": 0.1522,
332
+ "step": 36000
333
+ },
334
+ {
335
+ "epoch": 1.3155555555555556,
336
+ "grad_norm": 0.9291681051254272,
337
+ "learning_rate": 3.119427946850679e-05,
338
+ "loss": 0.1412,
339
+ "step": 37000
340
+ },
341
+ {
342
+ "epoch": 1.3511111111111112,
343
+ "grad_norm": 6.588221073150635,
344
+ "learning_rate": 3.053583891910399e-05,
345
+ "loss": 0.1416,
346
+ "step": 38000
347
+ },
348
+ {
349
+ "epoch": 1.3866666666666667,
350
+ "grad_norm": 7.346938133239746,
351
+ "learning_rate": 2.9877398369701205e-05,
352
+ "loss": 0.1393,
353
+ "step": 39000
354
+ },
355
+ {
356
+ "epoch": 1.4222222222222223,
357
+ "grad_norm": 7.207787990570068,
358
+ "learning_rate": 2.921895782029841e-05,
359
+ "loss": 0.1389,
360
+ "step": 40000
361
+ },
362
+ {
363
+ "epoch": 1.4222222222222223,
364
+ "eval_loss": 0.2562263309955597,
365
+ "eval_runtime": 31.2811,
366
+ "eval_samples_per_second": 31.968,
367
+ "eval_steps_per_second": 1.023,
368
+ "eval_sts-dev_pearson_cosine": 0.8227387520545976,
369
+ "eval_sts-dev_spearman_cosine": 0.826905561534745,
370
+ "step": 40000
371
+ },
372
+ {
373
+ "epoch": 1.4577777777777778,
374
+ "grad_norm": 2.1179184913635254,
375
+ "learning_rate": 2.8560517270895616e-05,
376
+ "loss": 0.1353,
377
+ "step": 41000
378
+ },
379
+ {
380
+ "epoch": 1.4933333333333334,
381
+ "grad_norm": 2.6916444301605225,
382
+ "learning_rate": 2.790207672149282e-05,
383
+ "loss": 0.1284,
384
+ "step": 42000
385
+ },
386
+ {
387
+ "epoch": 1.528888888888889,
388
+ "grad_norm": 9.770589828491211,
389
+ "learning_rate": 2.7243636172090027e-05,
390
+ "loss": 0.1317,
391
+ "step": 43000
392
+ },
393
+ {
394
+ "epoch": 1.5644444444444443,
395
+ "grad_norm": 2.033369779586792,
396
+ "learning_rate": 2.6585195622687232e-05,
397
+ "loss": 0.1242,
398
+ "step": 44000
399
+ },
400
+ {
401
+ "epoch": 1.6,
402
+ "grad_norm": 9.705333709716797,
403
+ "learning_rate": 2.5926755073284438e-05,
404
+ "loss": 0.1228,
405
+ "step": 45000
406
+ },
407
+ {
408
+ "epoch": 1.6,
409
+ "eval_loss": 0.22733546793460846,
410
+ "eval_runtime": 30.7521,
411
+ "eval_samples_per_second": 32.518,
412
+ "eval_steps_per_second": 1.041,
413
+ "eval_sts-dev_pearson_cosine": 0.8129372102759819,
414
+ "eval_sts-dev_spearman_cosine": 0.8243194932998689,
415
+ "step": 45000
416
+ },
417
+ {
418
+ "epoch": 1.6355555555555554,
419
+ "grad_norm": 6.558931827545166,
420
+ "learning_rate": 2.5268314523881643e-05,
421
+ "loss": 0.1308,
422
+ "step": 46000
423
+ },
424
+ {
425
+ "epoch": 1.6711111111111112,
426
+ "grad_norm": 2.8348495960235596,
427
+ "learning_rate": 2.4609873974478845e-05,
428
+ "loss": 0.1231,
429
+ "step": 47000
430
+ },
431
+ {
432
+ "epoch": 1.7066666666666666,
433
+ "grad_norm": 10.423678398132324,
434
+ "learning_rate": 2.395143342507605e-05,
435
+ "loss": 0.1196,
436
+ "step": 48000
437
+ },
438
+ {
439
+ "epoch": 1.7422222222222223,
440
+ "grad_norm": 4.737322807312012,
441
+ "learning_rate": 2.3292992875673256e-05,
442
+ "loss": 0.1202,
443
+ "step": 49000
444
+ },
445
+ {
446
+ "epoch": 1.7777777777777777,
447
+ "grad_norm": 8.491903305053711,
448
+ "learning_rate": 2.263455232627046e-05,
449
+ "loss": 0.12,
450
+ "step": 50000
451
+ },
452
+ {
453
+ "epoch": 1.7777777777777777,
454
+ "eval_loss": 0.21721433103084564,
455
+ "eval_runtime": 48.5394,
456
+ "eval_samples_per_second": 20.602,
457
+ "eval_steps_per_second": 0.659,
458
+ "eval_sts-dev_pearson_cosine": 0.8221879391828698,
459
+ "eval_sts-dev_spearman_cosine": 0.8276168454271825,
460
+ "step": 50000
461
+ },
462
+ {
463
+ "epoch": 1.8133333333333335,
464
+ "grad_norm": 7.367649078369141,
465
+ "learning_rate": 2.1976111776867667e-05,
466
+ "loss": 0.1213,
467
+ "step": 51000
468
+ },
469
+ {
470
+ "epoch": 1.8488888888888888,
471
+ "grad_norm": 3.9367151260375977,
472
+ "learning_rate": 2.1317671227464872e-05,
473
+ "loss": 0.1134,
474
+ "step": 52000
475
+ },
476
+ {
477
+ "epoch": 1.8844444444444446,
478
+ "grad_norm": 0.3449944853782654,
479
+ "learning_rate": 2.0659230678062077e-05,
480
+ "loss": 0.109,
481
+ "step": 53000
482
+ },
483
+ {
484
+ "epoch": 1.92,
485
+ "grad_norm": 1.9318583011627197,
486
+ "learning_rate": 2.0000790128659283e-05,
487
+ "loss": 0.1158,
488
+ "step": 54000
489
+ },
490
+ {
491
+ "epoch": 1.9555555555555557,
492
+ "grad_norm": 2.4510562419891357,
493
+ "learning_rate": 1.9342349579256488e-05,
494
+ "loss": 0.1073,
495
+ "step": 55000
496
+ },
497
+ {
498
+ "epoch": 1.9555555555555557,
499
+ "eval_loss": 0.22918041050434113,
500
+ "eval_runtime": 48.7814,
501
+ "eval_samples_per_second": 20.5,
502
+ "eval_steps_per_second": 0.656,
503
+ "eval_sts-dev_pearson_cosine": 0.8226444171661187,
504
+ "eval_sts-dev_spearman_cosine": 0.832783411507352,
505
+ "step": 55000
506
+ },
507
+ {
508
+ "epoch": 1.991111111111111,
509
+ "grad_norm": 8.485190391540527,
510
+ "learning_rate": 1.8683909029853697e-05,
511
+ "loss": 0.1053,
512
+ "step": 56000
513
+ },
514
+ {
515
+ "epoch": 2.026666666666667,
516
+ "grad_norm": 0.6446801424026489,
517
+ "learning_rate": 1.8025468480450902e-05,
518
+ "loss": 0.0643,
519
+ "step": 57000
520
+ },
521
+ {
522
+ "epoch": 2.062222222222222,
523
+ "grad_norm": 9.16294002532959,
524
+ "learning_rate": 1.7367027931048108e-05,
525
+ "loss": 0.0546,
526
+ "step": 58000
527
+ },
528
+ {
529
+ "epoch": 2.097777777777778,
530
+ "grad_norm": 4.509792327880859,
531
+ "learning_rate": 1.6708587381645313e-05,
532
+ "loss": 0.054,
533
+ "step": 59000
534
+ },
535
+ {
536
+ "epoch": 2.1333333333333333,
537
+ "grad_norm": 0.07099995762109756,
538
+ "learning_rate": 1.605014683224252e-05,
539
+ "loss": 0.0535,
540
+ "step": 60000
541
+ },
542
+ {
543
+ "epoch": 2.1333333333333333,
544
+ "eval_loss": 0.279130220413208,
545
+ "eval_runtime": 55.8907,
546
+ "eval_samples_per_second": 17.892,
547
+ "eval_steps_per_second": 0.573,
548
+ "eval_sts-dev_pearson_cosine": 0.8213308892826775,
549
+ "eval_sts-dev_spearman_cosine": 0.8271747105077873,
550
+ "step": 60000
551
+ },
552
+ {
553
+ "epoch": 2.168888888888889,
554
+ "grad_norm": 1.7394822835922241,
555
+ "learning_rate": 1.5391706282839724e-05,
556
+ "loss": 0.0512,
557
+ "step": 61000
558
+ },
559
+ {
560
+ "epoch": 2.2044444444444444,
561
+ "grad_norm": 7.789730548858643,
562
+ "learning_rate": 1.473326573343693e-05,
563
+ "loss": 0.0546,
564
+ "step": 62000
565
+ },
566
+ {
567
+ "epoch": 2.24,
568
+ "grad_norm": 10.078393936157227,
569
+ "learning_rate": 1.4074825184034135e-05,
570
+ "loss": 0.0539,
571
+ "step": 63000
572
+ },
573
+ {
574
+ "epoch": 2.2755555555555556,
575
+ "grad_norm": 2.606584310531616,
576
+ "learning_rate": 1.341638463463134e-05,
577
+ "loss": 0.0561,
578
+ "step": 64000
579
+ },
580
+ {
581
+ "epoch": 2.311111111111111,
582
+ "grad_norm": 4.760853290557861,
583
+ "learning_rate": 1.2757944085228546e-05,
584
+ "loss": 0.0478,
585
+ "step": 65000
586
+ },
587
+ {
588
+ "epoch": 2.311111111111111,
589
+ "eval_loss": 0.256197065114975,
590
+ "eval_runtime": 55.5249,
591
+ "eval_samples_per_second": 18.01,
592
+ "eval_steps_per_second": 0.576,
593
+ "eval_sts-dev_pearson_cosine": 0.8246413401396585,
594
+ "eval_sts-dev_spearman_cosine": 0.8288094370870545,
595
+ "step": 65000
596
+ },
597
+ {
598
+ "epoch": 2.3466666666666667,
599
+ "grad_norm": 1.3447166681289673,
600
+ "learning_rate": 1.2099503535825751e-05,
601
+ "loss": 0.0555,
602
+ "step": 66000
603
+ },
604
+ {
605
+ "epoch": 2.3822222222222225,
606
+ "grad_norm": 2.37924861907959,
607
+ "learning_rate": 1.1441062986422956e-05,
608
+ "loss": 0.0503,
609
+ "step": 67000
610
+ },
611
+ {
612
+ "epoch": 2.417777777777778,
613
+ "grad_norm": 1.2358015775680542,
614
+ "learning_rate": 1.0782622437020162e-05,
615
+ "loss": 0.0449,
616
+ "step": 68000
617
+ },
618
+ {
619
+ "epoch": 2.453333333333333,
620
+ "grad_norm": 0.22912859916687012,
621
+ "learning_rate": 1.0124181887617367e-05,
622
+ "loss": 0.0482,
623
+ "step": 69000
624
+ },
625
+ {
626
+ "epoch": 2.488888888888889,
627
+ "grad_norm": 9.296670913696289,
628
+ "learning_rate": 9.465741338214573e-06,
629
+ "loss": 0.0493,
630
+ "step": 70000
631
+ },
632
+ {
633
+ "epoch": 2.488888888888889,
634
+ "eval_loss": 0.26088747382164,
635
+ "eval_runtime": 55.5071,
636
+ "eval_samples_per_second": 18.016,
637
+ "eval_steps_per_second": 0.577,
638
+ "eval_sts-dev_pearson_cosine": 0.8255009970943029,
639
+ "eval_sts-dev_spearman_cosine": 0.831174307235804,
640
+ "step": 70000
641
+ },
642
+ {
643
+ "epoch": 2.5244444444444447,
644
+ "grad_norm": 1.4696452617645264,
645
+ "learning_rate": 8.807300788811778e-06,
646
+ "loss": 0.0486,
647
+ "step": 71000
648
+ },
649
+ {
650
+ "epoch": 2.56,
651
+ "grad_norm": 0.014002230018377304,
652
+ "learning_rate": 8.148860239408983e-06,
653
+ "loss": 0.0483,
654
+ "step": 72000
655
+ },
656
+ {
657
+ "epoch": 2.5955555555555554,
658
+ "grad_norm": 10.814248085021973,
659
+ "learning_rate": 7.49041969000619e-06,
660
+ "loss": 0.0444,
661
+ "step": 73000
662
+ },
663
+ {
664
+ "epoch": 2.631111111111111,
665
+ "grad_norm": 0.9171813726425171,
666
+ "learning_rate": 6.831979140603395e-06,
667
+ "loss": 0.0421,
668
+ "step": 74000
669
+ },
670
+ {
671
+ "epoch": 2.6666666666666665,
672
+ "grad_norm": 0.6967930197715759,
673
+ "learning_rate": 6.1735385912006005e-06,
674
+ "loss": 0.042,
675
+ "step": 75000
676
+ },
677
+ {
678
+ "epoch": 2.6666666666666665,
679
+ "eval_loss": 0.25533053278923035,
680
+ "eval_runtime": 55.7954,
681
+ "eval_samples_per_second": 17.923,
682
+ "eval_steps_per_second": 0.574,
683
+ "eval_sts-dev_pearson_cosine": 0.8229336010617173,
684
+ "eval_sts-dev_spearman_cosine": 0.8301305406762217,
685
+ "step": 75000
686
+ },
687
+ {
688
+ "epoch": 2.7022222222222223,
689
+ "grad_norm": 5.206503391265869,
690
+ "learning_rate": 5.515098041797807e-06,
691
+ "loss": 0.0409,
692
+ "step": 76000
693
+ },
694
+ {
695
+ "epoch": 2.7377777777777776,
696
+ "grad_norm": 8.96776008605957,
697
+ "learning_rate": 4.856657492395012e-06,
698
+ "loss": 0.0456,
699
+ "step": 77000
700
+ },
701
+ {
702
+ "epoch": 2.7733333333333334,
703
+ "grad_norm": 1.4853557348251343,
704
+ "learning_rate": 4.1982169429922175e-06,
705
+ "loss": 0.0411,
706
+ "step": 78000
707
+ },
708
+ {
709
+ "epoch": 2.8088888888888888,
710
+ "grad_norm": 0.08554021269083023,
711
+ "learning_rate": 3.539776393589423e-06,
712
+ "loss": 0.0416,
713
+ "step": 79000
714
+ },
715
+ {
716
+ "epoch": 2.8444444444444446,
717
+ "grad_norm": 0.010798577219247818,
718
+ "learning_rate": 2.8813358441866287e-06,
719
+ "loss": 0.0392,
720
+ "step": 80000
721
+ },
722
+ {
723
+ "epoch": 2.8444444444444446,
724
+ "eval_loss": 0.24324475228786469,
725
+ "eval_runtime": 55.6948,
726
+ "eval_samples_per_second": 17.955,
727
+ "eval_steps_per_second": 0.575,
728
+ "eval_sts-dev_pearson_cosine": 0.8258729995042317,
729
+ "eval_sts-dev_spearman_cosine": 0.8310916968189412,
730
+ "step": 80000
731
+ },
732
+ {
733
+ "epoch": 2.88,
734
+ "grad_norm": 1.3684927225112915,
735
+ "learning_rate": 2.222895294783834e-06,
736
+ "loss": 0.0365,
737
+ "step": 81000
738
+ },
739
+ {
740
+ "epoch": 2.9155555555555557,
741
+ "grad_norm": 21.493656158447266,
742
+ "learning_rate": 1.5644547453810395e-06,
743
+ "loss": 0.042,
744
+ "step": 82000
745
+ },
746
+ {
747
+ "epoch": 2.951111111111111,
748
+ "grad_norm": 7.1114654541015625,
749
+ "learning_rate": 9.06014195978245e-07,
750
+ "loss": 0.0422,
751
+ "step": 83000
752
+ },
753
+ {
754
+ "epoch": 2.986666666666667,
755
+ "grad_norm": 0.7839590907096863,
756
+ "learning_rate": 2.475736465754507e-07,
757
+ "loss": 0.0343,
758
+ "step": 84000
759
+ }
760
+ ],
761
+ "logging_steps": 1000,
762
+ "max_steps": 84375,
763
+ "num_input_tokens_seen": 0,
764
+ "num_train_epochs": 3,
765
+ "save_steps": 5000,
766
+ "stateful_callbacks": {
767
+ "TrainerControl": {
768
+ "args": {
769
+ "should_epoch_stop": false,
770
+ "should_evaluate": false,
771
+ "should_log": false,
772
+ "should_save": true,
773
+ "should_training_stop": true
774
+ },
775
+ "attributes": {}
776
+ }
777
+ },
778
+ "total_flos": 0.0,
779
+ "train_batch_size": 32,
780
+ "trial_name": null,
781
+ "trial_params": null
782
+ }
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,548 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - dense
7
+ - generated_from_trainer
8
+ - dataset_size:900000
9
+ - loss:GISTEmbedLoss
10
+ base_model: jhu-clsp/mmBERT-base
11
+ widget:
12
+ - source_sentence: ممكن تخبرني ارتفاع برج الخليفة
13
+ sentences:
14
+ - الزيادات في سكان الجبل الأسود. أنهى الجبل الأسود عام 2014 عدد سكانه 621800 نسمة
15
+ ، وهو ما يمثل زيادة قدرها 279 شخصًا مقارنة بعام 2013.
16
+ - يبلغ ارتفاعه الإجمالي 829.8 مترًا (2،722 قدمًا) وارتفاع السقف (باستثناء الهوائي)
17
+ 828 مترًا (2،717 قدمًا) ، وكان برج خليفة أطول مبنى في العالم منذ ظهوره في أواخر
18
+ عام 2008. برج خليفة
19
+ - في 4 يناير 2010 ، تحولت أنظار العالم إلى برج خليفة حيث افتتح صاحب السمو الشيخ
20
+ محمد بن راشد آل مكتوم ، نائب رئيس الدولة رئيس مجلس الوزراء حاكم دبي ، أطول برج
21
+ في العالم. تلا ذلك عرض ضوئي وصوتي مذهل وتم بثه في جميع أنحاء العالم. يشارك.
22
+ - source_sentence: فوائد الشاي الأخضر المثلج
23
+ sentences:
24
+ - فوائد الشاي الأخضر المثلج. تم استخدام الشاي الأخضر المثلج لسنوات لعلاج الأمراض
25
+ المختلفة وكان يستخدم بشكل شائع من قبل الشعب الصيني القديم. في الصين القديمة ،
26
+ كان الشاي الأخضر المثلج يستخدم لعلاج الاكتئاب والصداع. على عكس الشاي المعتاد ،
27
+ فهو يحتوي على نسبة منخفضة من الكافيين مما يؤدي غالبًا إلى الغثيان وكثرة التبول
28
+ والأرق.
29
+ - نعم ، يمكنك أن تصبح أطول بوصتين في الفضاء ؛ لكنه سيعود إلى الارتفاع الطبيعي عند
30
+ العودة إلى الأرض ، حيث تضغط الجاذبية على الغضروف اللين في عمودك الفقري. الذهاب
31
+ تسجيل الدخول التسجيل
32
+ - 'إذا كنت ترغب في قراءة المزيد عن الفوائد الصحية العديدة الرائعة للشاي الأخضر ،
33
+ فقم بإلقاء نظرة على هذا المقال: أفضل 10 فوائد صحية مستندة إلى الأدلة للشاي الأخضر.
34
+ مقال عن التغذية قائم على الأدلة من خبرائنا في هيئة التغذية.'
35
+ - source_sentence: تحديد التمايز
36
+ sentences:
37
+ - (انظر أيضا النيازك). معظم المعادن الشائعة المكونة للصخور عبارة عن مجموعات من أكثر
38
+ المواد وفرة في الأرض ، مع الأخذ في الاعتبار تمايز الأرض إلى طبقات أخف وأكثر كثافة
39
+ في وقت مبكر من تاريخها ، انظر أيضًا النيازك). معظم المعادن الشائعة المكونة للصخور
40
+ عبارة عن مجموعات من أكثر المواد وفرة في الأرض ، مع مراعاة تمايز الأرض إلى طبقات
41
+ أخف وأكثر كثافة في وقت مبكر من تاريخها.
42
+ - التفاضل. ن. 1. فعل أو عملية أو نتيجة التفرقة. 2. (الرياضيات) الرياضيات عملية مستخدمة
43
+ في حساب التفاضل والتكامل يتم فيها تحديد مشتق دالة أو متغير ؛ عكس التكامل. انظر
44
+ التكامل 6.
45
+ - 'يبقى الدجاج المطبوخ جيدًا لمدة 3 إلى 4 أيام في الثلاجة و 4 أشهر في الفريزر. كيف
46
+ تتحقق مما إذا كان الدجاج النيء سيئًا؟ أفضل طريقة هي شم وإلقاء نظرة على الدجاج:
47
+ علامات الدجاج الفاسد هي الرائحة الحامضة واللون الباهت والملمس اللزج. تخلصي من
48
+ أي دجاجة ذات مظهر أو رائحة كريهة.'
49
+ - source_sentence: هل يمكنك استخدام الخل لإزالة الشمع من أذنك
50
+ sentences:
51
+ - فوائد الخل الأبيض في الأذنين. قد يعمل الخل على فك شمع الأذن المتصلب. إذا شعرت
52
+ بعدم الراحة أو الحكة أو الألم في أذنك ، فقد لا تكون حالة خطيرة. في بعض الحالات
53
+ ، قد يكون سبب عدم الراحة في الأذن هو انسداد الأذن بالشمع ، أو دخول صابون أو شامبو
54
+ في الأذن ، أو تراكم الماء من السباحة أو الاستحمام.
55
+ - الديمقراطية هي ضد 300 ، وواحدة من أفضل الروايات المصورة لهذا العام. في اليونان
56
+ القديمة ، يحاول جيش أثيني صغير النوم في الليلة السابقة لمعركة ضد الأمير زركسيس
57
+ وجحافله الفارسية الضخمة. إنهم غير قادرين على النوم ، لذلك يبدأ أحد الجنود في سرد
58
+ ​​قصة - قصة شجاعة ومعركة وثمن الحرية.
59
+ - 'إذا كنت غير قادر على إزالة الشمع أو إذا أصبحت أذنك أكثر تهيجًا ، فاطلب العلاج
60
+ الطبي. قد تسبب حالات أخرى أعراض تراكم شمع الأذن. من المهم أن يتمكن طبيبك من استبعاد
61
+ هؤلاء. يمكنهم استخدام منظار الأذن ، وهو أداة مضاءة بمكبر ، لرؤية بوضوح في أذنك
62
+ الداخلية. لإزالة تراكم الشمع ، قد يستخدم طبيبك:'
63
+ - source_sentence: متوسط ​​درجة الحرارة في أورلاندو فلوريدا
64
+ sentences:
65
+ - يستخدم سيلوستازول لعلاج أعراض العرج المتقطع. تتسبب هذه الحالة في انخفاض تدفق الدم
66
+ إلى الساقين ، مما يؤدي إلى الشعور بالألم أثناء المشي. يحسن سيلوستازول قدرتك على
67
+ المشي لمسافات أطول دون ألم. يمكن أيضًا استخدام سيلوستازول لأغراض غير مدرجة في
68
+ دليل الدواء هذا.
69
+ - طقس جنوب فلوريدا. تتمتع جنوب فلوريدا بمتوسط ​​درجة حرارة سنوي يبلغ 82 درجة فهرنهايت
70
+ ، وتتراوح من متوسط ​​منخفض يبلغ 75 درجة فهرنهايت في الشتاء إلى 89 درجة فهرنهايت
71
+ في الصيف ، مما يجعلها مكانًا مثاليًا إجازة والاستمتاع بأفضل سلعنا! جنوب فلوريدا
72
+ أيضًا رطب للغاية ، مما يجعل الهواء والحرارة أكثر سخونة. على سبيل المثال ، درجة
73
+ حرارة 80 درجة فهرنهايت مع رطوبة 85٪ ، تبدو في الواقع وكأنها 99 درجة فهرنهايت.
74
+ في بعض الأيام تشعر وكأنك تستطيع قطع الهواء بسكين! هذا هو سبب أهمية تكييف الهواء
75
+ بالنسبة لنا.
76
+ - تقع أورلاندو في وسط فلوريدا ، ويبلغ متوسط ​​درجة الحرارة الإجمالية فيها 83 درجة
77
+ مئوية ومتوسط ​​منخفض يبلغ 62 درجة مئوية. في المتوسط ​​الأكثر دفئًا في جاكسونفيل
78
+ هو يوليو ويناير هو متوسط ​​أبرد الشهور.
79
+ datasets:
80
+ - akhooli/arabic-triplets-1m-curated-sims-len
81
+ pipeline_tag: sentence-similarity
82
+ library_name: sentence-transformers
83
+ metrics:
84
+ - pearson_cosine
85
+ - spearman_cosine
86
+ model-index:
87
+ - name: SentenceTransformer based on jhu-clsp/mmBERT-base
88
+ results:
89
+ - task:
90
+ type: semantic-similarity
91
+ name: Semantic Similarity
92
+ dataset:
93
+ name: sts dev
94
+ type: sts-dev
95
+ metrics:
96
+ - type: pearson_cosine
97
+ value: 0.8258729995042317
98
+ name: Pearson Cosine
99
+ - type: spearman_cosine
100
+ value: 0.8310916968189412
101
+ name: Spearman Cosine
102
+ ---
103
+
104
+ # SentenceTransformer based on jhu-clsp/mmBERT-base
105
+
106
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [jhu-clsp/mmBERT-base](https://huggingface.co/jhu-clsp/mmBERT-base) on the [arabic-triplets-1m-curated-sims-len](https://huggingface.co/datasets/akhooli/arabic-triplets-1m-curated-sims-len) dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
107
+
108
+ ## Model Details
109
+
110
+ ### Model Description
111
+ - **Model Type:** Sentence Transformer
112
+ - **Base model:** [jhu-clsp/mmBERT-base](https://huggingface.co/jhu-clsp/mmBERT-base) <!-- at revision 212719a285585190121d7255ab5da22e97818e85 -->
113
+ - **Maximum Sequence Length:** 8192 tokens
114
+ - **Output Dimensionality:** 768 dimensions
115
+ - **Similarity Function:** Cosine Similarity
116
+ - **Training Dataset:**
117
+ - [arabic-triplets-1m-curated-sims-len](https://huggingface.co/datasets/akhooli/arabic-triplets-1m-curated-sims-len)
118
+ <!-- - **Language:** Unknown -->
119
+ <!-- - **License:** Unknown -->
120
+
121
+ ### Model Sources
122
+
123
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
124
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
125
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
126
+
127
+ ### Full Model Architecture
128
+
129
+ ```
130
+ SentenceTransformer(
131
+ (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
132
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
133
+ )
134
+ ```
135
+
136
+ ## Usage
137
+
138
+ ### Direct Usage (Sentence Transformers)
139
+
140
+ First install the Sentence Transformers library:
141
+
142
+ ```bash
143
+ pip install -U sentence-transformers
144
+ ```
145
+
146
+ Then you can load this model and run inference.
147
+ ```python
148
+ from sentence_transformers import SentenceTransformer
149
+
150
+ # Download from the 🤗 Hub
151
+ model = SentenceTransformer("sentence_transformers_model_id")
152
+ # Run inference
153
+ sentences = [
154
+ 'متوسط \u200b\u200bدرجة الحرارة في أورلاندو فلوريدا',
155
+ 'تقع أورلاندو في وسط فلوريدا ، ويبلغ متوسط \u200b\u200bدرجة الحرارة الإجمالية فيها 83 درجة مئوية ومتوسط \u200b\u200bمنخفض يبلغ 62 درجة مئوية. في المتوسط \u200b\u200bالأكثر دفئًا في جاكسونفيل هو يوليو ويناير هو متوسط \u200b\u200bأبرد الشهور.',
156
+ 'طقس جنوب فلوريدا. تتمتع جنوب فلوريدا بمتوسط \u200b\u200bدرجة حرارة سنوي يبلغ 82 درجة فهرنهايت ، وتتراوح من متوسط \u200b\u200bمنخفض يبلغ 75 درجة فهرنهايت في الشتاء إلى 89 درجة فهرنهايت في الصيف ، مما يجعلها مكانًا مثاليًا إجازة والاستمتاع بأفضل سلعنا! جنوب فلوريدا أيضًا رطب للغاية ، مما يجعل الهواء والحرارة أكثر سخونة. على سبيل المثال ، درجة حرارة 80 درجة فهرنهايت مع رطوبة 85٪ ، تبدو في الواقع وكأنها 99 درجة فهرنهايت. في بعض الأيام تشعر وكأنك تستطيع قطع الهواء بسكين! هذا هو سبب أهمية تكييف الهواء بالنسبة لنا.',
157
+ ]
158
+ embeddings = model.encode(sentences)
159
+ print(embeddings.shape)
160
+ # [3, 768]
161
+
162
+ # Get the similarity scores for the embeddings
163
+ similarities = model.similarity(embeddings, embeddings)
164
+ print(similarities)
165
+ # tensor([[1.0000, 0.8495, 0.7115],
166
+ # [0.8495, 1.0000, 0.7436],
167
+ # [0.7115, 0.7436, 1.0000]])
168
+ ```
169
+
170
+ <!--
171
+ ### Direct Usage (Transformers)
172
+
173
+ <details><summary>Click to see the direct usage in Transformers</summary>
174
+
175
+ </details>
176
+ -->
177
+
178
+ <!--
179
+ ### Downstream Usage (Sentence Transformers)
180
+
181
+ You can finetune this model on your own dataset.
182
+
183
+ <details><summary>Click to expand</summary>
184
+
185
+ </details>
186
+ -->
187
+
188
+ <!--
189
+ ### Out-of-Scope Use
190
+
191
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
192
+ -->
193
+
194
+ ## Evaluation
195
+
196
+ ### Metrics
197
+
198
+ #### Semantic Similarity
199
+
200
+ * Dataset: `sts-dev`
201
+ * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
202
+
203
+ | Metric | Value |
204
+ |:--------------------|:-----------|
205
+ | pearson_cosine | 0.8259 |
206
+ | **spearman_cosine** | **0.8311** |
207
+
208
+ <!--
209
+ ## Bias, Risks and Limitations
210
+
211
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
212
+ -->
213
+
214
+ <!--
215
+ ### Recommendations
216
+
217
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
218
+ -->
219
+
220
+ ## Training Details
221
+
222
+ ### Training Dataset
223
+
224
+ #### arabic-triplets-1m-curated-sims-len
225
+
226
+ * Dataset: [arabic-triplets-1m-curated-sims-len](https://huggingface.co/datasets/akhooli/arabic-triplets-1m-curated-sims-len) at [7c87e57](https://huggingface.co/datasets/akhooli/arabic-triplets-1m-curated-sims-len/tree/7c87e5716c557ed659518c0d7c66770166b7e54b)
227
+ * Size: 900,000 training samples
228
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>sentence3</code>
229
+ * Approximate statistics based on the first 1000 samples:
230
+ | | sentence1 | sentence2 | sentence3 |
231
+ |:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
232
+ | type | string | string | string |
233
+ | details | <ul><li>min: 4 tokens</li><li>mean: 22.01 tokens</li><li>max: 80 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 15.04 tokens</li><li>max: 109 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 15.2 tokens</li><li>max: 48 tokens</li></ul> |
234
+ * Samples:
235
+ | sentence1 | sentence2 | sentence3 |
236
+ |:--------------------------------|:--------------------------------------------------------|:----------------------------------------------------------------------|
237
+ | <code>هناك رجل في الشارع</code> | <code>رجل يحمل مالاً يقف أمام فرقة موسيقية ومتجر</code> | <code>رجلين و صبي صغير في سترة أرجوانية يمسكون منشورات ترويجية</code> |
238
+ | <code>الكلب يلعب بالجلب.</code> | <code>هناك كلب سمراء في منتصف الحقل يجلب كرة تنس</code> | <code>هناك كلب على العشب يهز نفسه حتى يجف.</code> |
239
+ | <code>شخصان يسيران.</code> | <code>شخصان يضحكان</code> | <code>رجل وامرأة يركبان دراجة مزدوجة معاً</code> |
240
+ * Loss: [<code>GISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#gistembedloss) with these parameters:
241
+ ```json
242
+ {
243
+ "guide": "SentenceTransformer('Omartificial-Intelligence-Space/Arabic-Triplet-Matryoshka-V2')",
244
+ "temperature": 0.01,
245
+ "margin_strategy": "absolute",
246
+ "margin": 0.0,
247
+ "contrast_anchors": true,
248
+ "contrast_positives": true,
249
+ "gather_across_devices": false
250
+ }
251
+ ```
252
+
253
+ ### Evaluation Dataset
254
+
255
+ #### arabic-triplets-1m-curated-sims-len
256
+
257
+ * Dataset: [arabic-triplets-1m-curated-sims-len](https://huggingface.co/datasets/akhooli/arabic-triplets-1m-curated-sims-len) at [7c87e57](https://huggingface.co/datasets/akhooli/arabic-triplets-1m-curated-sims-len/tree/7c87e5716c557ed659518c0d7c66770166b7e54b)
258
+ * Size: 1,000 evaluation samples
259
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>sentence3</code>
260
+ * Approximate statistics based on the first 1000 samples:
261
+ | | sentence1 | sentence2 | sentence3 |
262
+ |:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
263
+ | type | string | string | string |
264
+ | details | <ul><li>min: 4 tokens</li><li>mean: 12.01 tokens</li><li>max: 32 tokens</li></ul> | <ul><li>min: 22 tokens</li><li>mean: 109.31 tokens</li><li>max: 311 tokens</li></ul> | <ul><li>min: 28 tokens</li><li>mean: 103.89 tokens</li><li>max: 289 tokens</li></ul> |
265
+ * Samples:
266
+ | sentence1 | sentence2 | sentence3 |
267
+ |:--------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
268
+ | <code>ما هي الجيولوجيا للأطفال</code> | <code>1 العلوم والأخبار والموارد للأطفال - EurekAlert! بوابة العلوم للأطفال هي مصدر مركزي عبر الإنترنت للأخبار والموارد الملائمة للأطفال. جيولوجيا الولايات المتحدة - الجيولوجيا هي دراسة الأرض.</code> | <code>يدرس علماء المحيطات كل جانب من جوانب المحيط ، مثل كيمياء مياه المحيط ، والجيولوجيا المرتبطة بالمحيطات ، والحركات الفيزيائية لمياه المحيط ، أو حتى الحياة التي تعتبر المحيط موطنه.</code> |
269
+ | <code>تعريف القياس الحيوي</code> | <code>تعريف القياس الحيوي. : الكشف عن بعد وقياس وظيفة أو نشاط أو حالة بشرية أو حيوانية (مثل معدل ضربات القلب أو درجة حرارة الجسم)</code> | <code>هذا هو تعريف الإنزيم المساعد وشرح الفرق بين الإنزيمات المساعدة والعوامل المساعدة والمجموعات الاصطناعية. يتم توفير أمثلة. هذا هو تعريف الإنزيم المساعد وشرح الفرق بين الإنزيمات المساعدة والعوامل المساعدة والمجموعات الاصطناعية. يتم توفير أمثلة. تعريف الإنزيم المساعد وأمثلة</code> |
270
+ | <code>تعريف الصدارة</code> | <code>¢ هو الموقف الأكثر بروزًا أو أهمية: كانت واحدة من السياسيين في / في طليعة حملة إطلاق سراح السجناء. فريقه في طليعة البحث العلمي في اللقاحات. قاموس المرادفات: المرادفات والكلمات ذات الصلة. وصف المكانة الرائدة.</code> | <code>ولكن ، نظرًا لأن التعقيدات الموجودة في مجتمع اليوم تطرح قضايا مراقبة الأصول أو الحماية أو تخفيض الضرائب في مقدمة التخطيط ، فإن استخدام الصناديق الرسمية يتزايد. الصناديق الاستئمانية عبارة عن هياكل يتم من خلالها إدارة الممتلكات أو الأصول وإدارتها لصالح المستفيدين من الثقة.</code> |
271
+ * Loss: [<code>GISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#gistembedloss) with these parameters:
272
+ ```json
273
+ {
274
+ "guide": "SentenceTransformer('Omartificial-Intelligence-Space/Arabic-Triplet-Matryoshka-V2')",
275
+ "temperature": 0.01,
276
+ "margin_strategy": "absolute",
277
+ "margin": 0.0,
278
+ "contrast_anchors": true,
279
+ "contrast_positives": true,
280
+ "gather_across_devices": false
281
+ }
282
+ ```
283
+
284
+ ### Training Hyperparameters
285
+ #### Non-Default Hyperparameters
286
+
287
+ - `eval_strategy`: steps
288
+ - `per_device_train_batch_size`: 32
289
+ - `per_device_eval_batch_size`: 32
290
+ - `warmup_ratio`: 0.1
291
+ - `fp16`: True
292
+ - `batch_sampler`: no_duplicates
293
+
294
+ #### All Hyperparameters
295
+ <details><summary>Click to expand</summary>
296
+
297
+ - `overwrite_output_dir`: False
298
+ - `do_predict`: False
299
+ - `eval_strategy`: steps
300
+ - `prediction_loss_only`: True
301
+ - `per_device_train_batch_size`: 32
302
+ - `per_device_eval_batch_size`: 32
303
+ - `gradient_accumulation_steps`: 1
304
+ - `eval_accumulation_steps`: None
305
+ - `torch_empty_cache_steps`: None
306
+ - `learning_rate`: 5e-05
307
+ - `weight_decay`: 0.0
308
+ - `adam_beta1`: 0.9
309
+ - `adam_beta2`: 0.999
310
+ - `adam_epsilon`: 1e-08
311
+ - `max_grad_norm`: 1.0
312
+ - `num_train_epochs`: 3
313
+ - `max_steps`: -1
314
+ - `lr_scheduler_type`: linear
315
+ - `lr_scheduler_kwargs`: None
316
+ - `warmup_ratio`: 0.1
317
+ - `warmup_steps`: 0
318
+ - `log_level`: passive
319
+ - `log_level_replica`: warning
320
+ - `log_on_each_node`: True
321
+ - `logging_nan_inf_filter`: True
322
+ - `save_safetensors`: True
323
+ - `save_on_each_node`: False
324
+ - `save_only_model`: False
325
+ - `restore_callback_states_from_checkpoint`: False
326
+ - `use_cpu`: False
327
+ - `seed`: 42
328
+ - `data_seed`: None
329
+ - `jit_mode_eval`: False
330
+ - `bf16`: False
331
+ - `fp16`: True
332
+ - `half_precision_backend`: None
333
+ - `bf16_full_eval`: False
334
+ - `fp16_full_eval`: False
335
+ - `tf32`: None
336
+ - `local_rank`: 0
337
+ - `ddp_backend`: None
338
+ - `tpu_num_cores`: None
339
+ - `debug`: []
340
+ - `dataloader_drop_last`: False
341
+ - `dataloader_num_workers`: 0
342
+ - `dataloader_prefetch_factor`: None
343
+ - `past_index`: -1
344
+ - `disable_tqdm`: False
345
+ - `remove_unused_columns`: True
346
+ - `label_names`: None
347
+ - `load_best_model_at_end`: False
348
+ - `ignore_data_skip`: False
349
+ - `fsdp`: []
350
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
351
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
352
+ - `parallelism_config`: None
353
+ - `deepspeed`: None
354
+ - `label_smoothing_factor`: 0.0
355
+ - `optim`: adamw_torch_fused
356
+ - `optim_args`: None
357
+ - `group_by_length`: False
358
+ - `length_column_name`: length
359
+ - `project`: huggingface
360
+ - `trackio_space_id`: trackio
361
+ - `ddp_find_unused_parameters`: None
362
+ - `ddp_bucket_cap_mb`: None
363
+ - `ddp_broadcast_buffers`: False
364
+ - `dataloader_pin_memory`: True
365
+ - `dataloader_persistent_workers`: False
366
+ - `skip_memory_metrics`: True
367
+ - `push_to_hub`: False
368
+ - `resume_from_checkpoint`: None
369
+ - `hub_model_id`: None
370
+ - `hub_strategy`: every_save
371
+ - `hub_private_repo`: None
372
+ - `hub_always_push`: False
373
+ - `hub_revision`: None
374
+ - `gradient_checkpointing`: False
375
+ - `gradient_checkpointing_kwargs`: None
376
+ - `include_for_metrics`: []
377
+ - `eval_do_concat_batches`: True
378
+ - `mp_parameters`:
379
+ - `auto_find_batch_size`: False
380
+ - `full_determinism`: False
381
+ - `ray_scope`: last
382
+ - `ddp_timeout`: 1800
383
+ - `torch_compile`: False
384
+ - `torch_compile_backend`: None
385
+ - `torch_compile_mode`: None
386
+ - `include_tokens_per_second`: None
387
+ - `include_num_input_tokens_seen`: no
388
+ - `neftune_noise_alpha`: None
389
+ - `optim_target_modules`: None
390
+ - `batch_eval_metrics`: False
391
+ - `eval_on_start`: False
392
+ - `use_liger_kernel`: False
393
+ - `liger_kernel_config`: None
394
+ - `eval_use_gather_object`: False
395
+ - `average_tokens_across_devices`: True
396
+ - `prompts`: None
397
+ - `batch_sampler`: no_duplicates
398
+ - `multi_dataset_batch_sampler`: proportional
399
+ - `router_mapping`: {}
400
+ - `learning_rate_mapping`: {}
401
+
402
+ </details>
403
+
404
+ ### Training Logs
405
+ | Epoch | Step | Training Loss | Validation Loss | sts-dev_spearman_cosine |
406
+ |:------:|:-----:|:-------------:|:---------------:|:-----------------------:|
407
+ | -1 | -1 | - | - | 0.6723 |
408
+ | 0.0356 | 1000 | 1.6025 | - | - |
409
+ | 0.0711 | 2000 | 0.5208 | - | - |
410
+ | 0.1067 | 3000 | 0.4374 | - | - |
411
+ | 0.1422 | 4000 | 0.4142 | - | - |
412
+ | 0.1778 | 5000 | 0.3916 | 0.3775 | 0.8196 |
413
+ | 0.2133 | 6000 | 0.382 | - | - |
414
+ | 0.2489 | 7000 | 0.3824 | - | - |
415
+ | 0.2844 | 8000 | 0.3744 | - | - |
416
+ | 0.32 | 9000 | 0.3781 | - | - |
417
+ | 0.3556 | 10000 | 0.367 | 0.3758 | 0.8136 |
418
+ | 0.3911 | 11000 | 0.3527 | - | - |
419
+ | 0.4267 | 12000 | 0.3354 | - | - |
420
+ | 0.4622 | 13000 | 0.3147 | - | - |
421
+ | 0.4978 | 14000 | 0.3084 | - | - |
422
+ | 0.5333 | 15000 | 0.2975 | 0.3515 | 0.8205 |
423
+ | 0.5689 | 16000 | 0.2978 | - | - |
424
+ | 0.6044 | 17000 | 0.2916 | - | - |
425
+ | 0.64 | 18000 | 0.2792 | - | - |
426
+ | 0.6756 | 19000 | 0.2661 | - | - |
427
+ | 0.7111 | 20000 | 0.2583 | 0.3185 | 0.8159 |
428
+ | 0.7467 | 21000 | 0.2529 | - | - |
429
+ | 0.7822 | 22000 | 0.2505 | - | - |
430
+ | 0.8178 | 23000 | 0.2454 | - | - |
431
+ | 0.8533 | 24000 | 0.242 | - | - |
432
+ | 0.8889 | 25000 | 0.2307 | 0.2882 | 0.8175 |
433
+ | 0.9244 | 26000 | 0.2349 | - | - |
434
+ | 0.96 | 27000 | 0.2238 | - | - |
435
+ | 0.9956 | 28000 | 0.2132 | - | - |
436
+ | 1.0311 | 29000 | 0.1601 | - | - |
437
+ | 1.0667 | 30000 | 0.1581 | 0.2581 | 0.8222 |
438
+ | 1.1022 | 31000 | 0.1532 | - | - |
439
+ | 1.1378 | 32000 | 0.1494 | - | - |
440
+ | 1.1733 | 33000 | 0.1484 | - | - |
441
+ | 1.2089 | 34000 | 0.1529 | - | - |
442
+ | 1.2444 | 35000 | 0.1467 | 0.2444 | 0.8211 |
443
+ | 1.28 | 36000 | 0.1522 | - | - |
444
+ | 1.3156 | 37000 | 0.1412 | - | - |
445
+ | 1.3511 | 38000 | 0.1416 | - | - |
446
+ | 1.3867 | 39000 | 0.1393 | - | - |
447
+ | 1.4222 | 40000 | 0.1389 | 0.2562 | 0.8269 |
448
+ | 1.4578 | 41000 | 0.1353 | - | - |
449
+ | 1.4933 | 42000 | 0.1284 | - | - |
450
+ | 1.5289 | 43000 | 0.1317 | - | - |
451
+ | 1.5644 | 44000 | 0.1242 | - | - |
452
+ | 1.6 | 45000 | 0.1228 | 0.2273 | 0.8243 |
453
+ | 1.6356 | 46000 | 0.1308 | - | - |
454
+ | 1.6711 | 47000 | 0.1231 | - | - |
455
+ | 1.7067 | 48000 | 0.1196 | - | - |
456
+ | 1.7422 | 49000 | 0.1202 | - | - |
457
+ | 1.7778 | 50000 | 0.12 | 0.2172 | 0.8276 |
458
+ | 1.8133 | 51000 | 0.1213 | - | - |
459
+ | 1.8489 | 52000 | 0.1134 | - | - |
460
+ | 1.8844 | 53000 | 0.109 | - | - |
461
+ | 1.92 | 54000 | 0.1158 | - | - |
462
+ | 1.9556 | 55000 | 0.1073 | 0.2292 | 0.8328 |
463
+ | 1.9911 | 56000 | 0.1053 | - | - |
464
+ | 2.0267 | 57000 | 0.0643 | - | - |
465
+ | 2.0622 | 58000 | 0.0546 | - | - |
466
+ | 2.0978 | 59000 | 0.054 | - | - |
467
+ | 2.1333 | 60000 | 0.0535 | 0.2791 | 0.8272 |
468
+ | 2.1689 | 61000 | 0.0512 | - | - |
469
+ | 2.2044 | 62000 | 0.0546 | - | - |
470
+ | 2.24 | 63000 | 0.0539 | - | - |
471
+ | 2.2756 | 64000 | 0.0561 | - | - |
472
+ | 2.3111 | 65000 | 0.0478 | 0.2562 | 0.8288 |
473
+ | 2.3467 | 66000 | 0.0555 | - | - |
474
+ | 2.3822 | 67000 | 0.0503 | - | - |
475
+ | 2.4178 | 68000 | 0.0449 | - | - |
476
+ | 2.4533 | 69000 | 0.0482 | - | - |
477
+ | 2.4889 | 70000 | 0.0493 | 0.2609 | 0.8312 |
478
+ | 2.5244 | 71000 | 0.0486 | - | - |
479
+ | 2.56 | 72000 | 0.0483 | - | - |
480
+ | 2.5956 | 73000 | 0.0444 | - | - |
481
+ | 2.6311 | 74000 | 0.0421 | - | - |
482
+ | 2.6667 | 75000 | 0.042 | 0.2553 | 0.8301 |
483
+ | 2.7022 | 76000 | 0.0409 | - | - |
484
+ | 2.7378 | 77000 | 0.0456 | - | - |
485
+ | 2.7733 | 78000 | 0.0411 | - | - |
486
+ | 2.8089 | 79000 | 0.0416 | - | - |
487
+ | 2.8444 | 80000 | 0.0392 | 0.2432 | 0.8311 |
488
+ | 2.88 | 81000 | 0.0365 | - | - |
489
+ | 2.9156 | 82000 | 0.042 | - | - |
490
+ | 2.9511 | 83000 | 0.0422 | - | - |
491
+ | 2.9867 | 84000 | 0.0343 | - | - |
492
+
493
+
494
+ ### Framework Versions
495
+ - Python: 3.11.13
496
+ - Sentence Transformers: 5.1.0
497
+ - Transformers: 4.57.0.dev0
498
+ - PyTorch: 2.10.0.dev20251003+cu130
499
+ - Accelerate: 1.10.1
500
+ - Datasets: 3.6.0
501
+ - Tokenizers: 0.22.1
502
+
503
+ ## Citation
504
+
505
+ ### BibTeX
506
+
507
+ #### Sentence Transformers
508
+ ```bibtex
509
+ @inproceedings{reimers-2019-sentence-bert,
510
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
511
+ author = "Reimers, Nils and Gurevych, Iryna",
512
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
513
+ month = "11",
514
+ year = "2019",
515
+ publisher = "Association for Computational Linguistics",
516
+ url = "https://arxiv.org/abs/1908.10084",
517
+ }
518
+ ```
519
+
520
+ #### GISTEmbedLoss
521
+ ```bibtex
522
+ @misc{solatorio2024gistembed,
523
+ title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning},
524
+ author={Aivin V. Solatorio},
525
+ year={2024},
526
+ eprint={2402.16829},
527
+ archivePrefix={arXiv},
528
+ primaryClass={cs.LG}
529
+ }
530
+ ```
531
+
532
+ <!--
533
+ ## Glossary
534
+
535
+ *Clearly define terms in order to be accessible across audiences.*
536
+ -->
537
+
538
+ <!--
539
+ ## Model Card Authors
540
+
541
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
542
+ -->
543
+
544
+ <!--
545
+ ## Model Card Contact
546
+
547
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
548
+ -->
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 2,
8
+ "classifier_activation": "gelu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 1,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
+ "dtype": "float32",
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 1,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 768,
23
+ "initializer_cutoff_factor": 2.0,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 1152,
26
+ "layer_norm_eps": 1e-05,
27
+ "local_attention": 128,
28
+ "local_rope_theta": 160000,
29
+ "mask_token_id": 4,
30
+ "max_position_embeddings": 8192,
31
+ "mlp_bias": false,
32
+ "mlp_dropout": 0.0,
33
+ "model_type": "modernbert",
34
+ "norm_bias": false,
35
+ "norm_eps": 1e-05,
36
+ "num_attention_heads": 12,
37
+ "num_hidden_layers": 22,
38
+ "pad_token_id": 0,
39
+ "position_embedding_type": "sans_pos",
40
+ "repad_logits_with_grad": false,
41
+ "sep_token_id": 1,
42
+ "sparse_pred_ignore_index": -100,
43
+ "sparse_prediction": false,
44
+ "transformers_version": "4.57.0.dev0",
45
+ "vocab_size": 256000
46
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.1.0",
5
+ "transformers": "4.57.0.dev0",
6
+ "pytorch": "2.10.0.dev20251003+cu130"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ea194f45f6543ea65c2dc20125146d8b94442ac5541de63646733f6765176da
3
+ size 1227771776
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3ead72c94c3bf18e088382ef3bc47263d5b72ab3cf6786aff4a1f0398ac5e50
3
+ size 2455632843
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36d9cc7a319d6acc5605384569f688e1c823b396a18a3a75cfdea3f3d919bf2a
3
+ size 14645
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:582f25e00d2790e066985dc3644444f5cfc0720236906190d3bba9257f1bcb8b
3
+ size 1383
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e6d42a5a0c3f4b506a9233442fdc03895d7ba05d5558d3bad74a23e202608c7
3
+ size 1465
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 8192,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<start_of_turn>",
4
+ "<end_of_turn>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<bos>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "cls_token": {
14
+ "content": "<bos>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "eos_token": {
21
+ "content": "<eos>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "mask_token": {
28
+ "content": "<mask>",
29
+ "lstrip": true,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ },
34
+ "pad_token": {
35
+ "content": "<pad>",
36
+ "lstrip": false,
37
+ "normalized": false,
38
+ "rstrip": false,
39
+ "single_word": false
40
+ },
41
+ "sep_token": {
42
+ "content": "<eos>",
43
+ "lstrip": false,
44
+ "normalized": false,
45
+ "rstrip": false,
46
+ "single_word": false
47
+ },
48
+ "unk_token": {
49
+ "content": "<unk>",
50
+ "lstrip": false,
51
+ "normalized": false,
52
+ "rstrip": false,
53
+ "single_word": false
54
+ }
55
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd47075711f75a143d1b78e01a41cc65c1c591b00d3cfeffc23db07adce1392
3
+ size 34363442
tokenizer_config.json ADDED
@@ -0,0 +1,2018 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<pad>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<eos>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<bos>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "5": {
45
+ "content": "<2mass>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "6": {
53
+ "content": "[@BOS@]",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": false
59
+ },
60
+ "7": {
61
+ "content": "<unused0>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": false
67
+ },
68
+ "8": {
69
+ "content": "<unused1>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": false
75
+ },
76
+ "9": {
77
+ "content": "<unused2>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": false
83
+ },
84
+ "10": {
85
+ "content": "<unused3>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": false
91
+ },
92
+ "11": {
93
+ "content": "<unused4>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": false
99
+ },
100
+ "12": {
101
+ "content": "<unused5>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": false
107
+ },
108
+ "13": {
109
+ "content": "<unused6>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": false
115
+ },
116
+ "14": {
117
+ "content": "<unused7>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "15": {
125
+ "content": "<unused8>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "16": {
133
+ "content": "<unused9>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "17": {
141
+ "content": "<unused10>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "18": {
149
+ "content": "<unused11>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "19": {
157
+ "content": "<unused12>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "20": {
165
+ "content": "<unused13>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "21": {
173
+ "content": "<unused14>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ },
180
+ "22": {
181
+ "content": "<unused15>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": false
187
+ },
188
+ "23": {
189
+ "content": "<unused16>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": false
195
+ },
196
+ "24": {
197
+ "content": "<unused17>",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": false
203
+ },
204
+ "25": {
205
+ "content": "<unused18>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": false
211
+ },
212
+ "26": {
213
+ "content": "<unused19>",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": false
219
+ },
220
+ "27": {
221
+ "content": "<unused20>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": false
227
+ },
228
+ "28": {
229
+ "content": "<unused21>",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": false
235
+ },
236
+ "29": {
237
+ "content": "<unused22>",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": false
243
+ },
244
+ "30": {
245
+ "content": "<unused23>",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": false
251
+ },
252
+ "31": {
253
+ "content": "<unused24>",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": false
259
+ },
260
+ "32": {
261
+ "content": "<unused25>",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": false
267
+ },
268
+ "33": {
269
+ "content": "<unused26>",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": false
275
+ },
276
+ "34": {
277
+ "content": "<unused27>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": false
283
+ },
284
+ "35": {
285
+ "content": "<unused28>",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": false
291
+ },
292
+ "36": {
293
+ "content": "<unused29>",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": false
299
+ },
300
+ "37": {
301
+ "content": "<unused30>",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": false
307
+ },
308
+ "38": {
309
+ "content": "<unused31>",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": false
315
+ },
316
+ "39": {
317
+ "content": "<unused32>",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": false
323
+ },
324
+ "40": {
325
+ "content": "<unused33>",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": false
331
+ },
332
+ "41": {
333
+ "content": "<unused34>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": false
339
+ },
340
+ "42": {
341
+ "content": "<unused35>",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": false
347
+ },
348
+ "43": {
349
+ "content": "<unused36>",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": false
355
+ },
356
+ "44": {
357
+ "content": "<unused37>",
358
+ "lstrip": false,
359
+ "normalized": false,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": false
363
+ },
364
+ "45": {
365
+ "content": "<unused38>",
366
+ "lstrip": false,
367
+ "normalized": false,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": false
371
+ },
372
+ "46": {
373
+ "content": "<unused39>",
374
+ "lstrip": false,
375
+ "normalized": false,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": false
379
+ },
380
+ "47": {
381
+ "content": "<unused40>",
382
+ "lstrip": false,
383
+ "normalized": false,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": false
387
+ },
388
+ "48": {
389
+ "content": "<unused41>",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": false
395
+ },
396
+ "49": {
397
+ "content": "<unused42>",
398
+ "lstrip": false,
399
+ "normalized": false,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": false
403
+ },
404
+ "50": {
405
+ "content": "<unused43>",
406
+ "lstrip": false,
407
+ "normalized": false,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": false
411
+ },
412
+ "51": {
413
+ "content": "<unused44>",
414
+ "lstrip": false,
415
+ "normalized": false,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": false
419
+ },
420
+ "52": {
421
+ "content": "<unused45>",
422
+ "lstrip": false,
423
+ "normalized": false,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": false
427
+ },
428
+ "53": {
429
+ "content": "<unused46>",
430
+ "lstrip": false,
431
+ "normalized": false,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": false
435
+ },
436
+ "54": {
437
+ "content": "<unused47>",
438
+ "lstrip": false,
439
+ "normalized": false,
440
+ "rstrip": false,
441
+ "single_word": false,
442
+ "special": false
443
+ },
444
+ "55": {
445
+ "content": "<unused48>",
446
+ "lstrip": false,
447
+ "normalized": false,
448
+ "rstrip": false,
449
+ "single_word": false,
450
+ "special": false
451
+ },
452
+ "56": {
453
+ "content": "<unused49>",
454
+ "lstrip": false,
455
+ "normalized": false,
456
+ "rstrip": false,
457
+ "single_word": false,
458
+ "special": false
459
+ },
460
+ "57": {
461
+ "content": "<unused50>",
462
+ "lstrip": false,
463
+ "normalized": false,
464
+ "rstrip": false,
465
+ "single_word": false,
466
+ "special": false
467
+ },
468
+ "58": {
469
+ "content": "<unused51>",
470
+ "lstrip": false,
471
+ "normalized": false,
472
+ "rstrip": false,
473
+ "single_word": false,
474
+ "special": false
475
+ },
476
+ "59": {
477
+ "content": "<unused52>",
478
+ "lstrip": false,
479
+ "normalized": false,
480
+ "rstrip": false,
481
+ "single_word": false,
482
+ "special": false
483
+ },
484
+ "60": {
485
+ "content": "<unused53>",
486
+ "lstrip": false,
487
+ "normalized": false,
488
+ "rstrip": false,
489
+ "single_word": false,
490
+ "special": false
491
+ },
492
+ "61": {
493
+ "content": "<unused54>",
494
+ "lstrip": false,
495
+ "normalized": false,
496
+ "rstrip": false,
497
+ "single_word": false,
498
+ "special": false
499
+ },
500
+ "62": {
501
+ "content": "<unused55>",
502
+ "lstrip": false,
503
+ "normalized": false,
504
+ "rstrip": false,
505
+ "single_word": false,
506
+ "special": false
507
+ },
508
+ "63": {
509
+ "content": "<unused56>",
510
+ "lstrip": false,
511
+ "normalized": false,
512
+ "rstrip": false,
513
+ "single_word": false,
514
+ "special": false
515
+ },
516
+ "64": {
517
+ "content": "<unused57>",
518
+ "lstrip": false,
519
+ "normalized": false,
520
+ "rstrip": false,
521
+ "single_word": false,
522
+ "special": false
523
+ },
524
+ "65": {
525
+ "content": "<unused58>",
526
+ "lstrip": false,
527
+ "normalized": false,
528
+ "rstrip": false,
529
+ "single_word": false,
530
+ "special": false
531
+ },
532
+ "66": {
533
+ "content": "<unused59>",
534
+ "lstrip": false,
535
+ "normalized": false,
536
+ "rstrip": false,
537
+ "single_word": false,
538
+ "special": false
539
+ },
540
+ "67": {
541
+ "content": "<unused60>",
542
+ "lstrip": false,
543
+ "normalized": false,
544
+ "rstrip": false,
545
+ "single_word": false,
546
+ "special": false
547
+ },
548
+ "68": {
549
+ "content": "<unused61>",
550
+ "lstrip": false,
551
+ "normalized": false,
552
+ "rstrip": false,
553
+ "single_word": false,
554
+ "special": false
555
+ },
556
+ "69": {
557
+ "content": "<unused62>",
558
+ "lstrip": false,
559
+ "normalized": false,
560
+ "rstrip": false,
561
+ "single_word": false,
562
+ "special": false
563
+ },
564
+ "70": {
565
+ "content": "<unused63>",
566
+ "lstrip": false,
567
+ "normalized": false,
568
+ "rstrip": false,
569
+ "single_word": false,
570
+ "special": false
571
+ },
572
+ "71": {
573
+ "content": "<unused64>",
574
+ "lstrip": false,
575
+ "normalized": false,
576
+ "rstrip": false,
577
+ "single_word": false,
578
+ "special": false
579
+ },
580
+ "72": {
581
+ "content": "<unused65>",
582
+ "lstrip": false,
583
+ "normalized": false,
584
+ "rstrip": false,
585
+ "single_word": false,
586
+ "special": false
587
+ },
588
+ "73": {
589
+ "content": "<unused66>",
590
+ "lstrip": false,
591
+ "normalized": false,
592
+ "rstrip": false,
593
+ "single_word": false,
594
+ "special": false
595
+ },
596
+ "74": {
597
+ "content": "<unused67>",
598
+ "lstrip": false,
599
+ "normalized": false,
600
+ "rstrip": false,
601
+ "single_word": false,
602
+ "special": false
603
+ },
604
+ "75": {
605
+ "content": "<unused68>",
606
+ "lstrip": false,
607
+ "normalized": false,
608
+ "rstrip": false,
609
+ "single_word": false,
610
+ "special": false
611
+ },
612
+ "76": {
613
+ "content": "<unused69>",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false,
618
+ "special": false
619
+ },
620
+ "77": {
621
+ "content": "<unused70>",
622
+ "lstrip": false,
623
+ "normalized": false,
624
+ "rstrip": false,
625
+ "single_word": false,
626
+ "special": false
627
+ },
628
+ "78": {
629
+ "content": "<unused71>",
630
+ "lstrip": false,
631
+ "normalized": false,
632
+ "rstrip": false,
633
+ "single_word": false,
634
+ "special": false
635
+ },
636
+ "79": {
637
+ "content": "<unused72>",
638
+ "lstrip": false,
639
+ "normalized": false,
640
+ "rstrip": false,
641
+ "single_word": false,
642
+ "special": false
643
+ },
644
+ "80": {
645
+ "content": "<unused73>",
646
+ "lstrip": false,
647
+ "normalized": false,
648
+ "rstrip": false,
649
+ "single_word": false,
650
+ "special": false
651
+ },
652
+ "81": {
653
+ "content": "<unused74>",
654
+ "lstrip": false,
655
+ "normalized": false,
656
+ "rstrip": false,
657
+ "single_word": false,
658
+ "special": false
659
+ },
660
+ "82": {
661
+ "content": "<unused75>",
662
+ "lstrip": false,
663
+ "normalized": false,
664
+ "rstrip": false,
665
+ "single_word": false,
666
+ "special": false
667
+ },
668
+ "83": {
669
+ "content": "<unused76>",
670
+ "lstrip": false,
671
+ "normalized": false,
672
+ "rstrip": false,
673
+ "single_word": false,
674
+ "special": false
675
+ },
676
+ "84": {
677
+ "content": "<unused77>",
678
+ "lstrip": false,
679
+ "normalized": false,
680
+ "rstrip": false,
681
+ "single_word": false,
682
+ "special": false
683
+ },
684
+ "85": {
685
+ "content": "<unused78>",
686
+ "lstrip": false,
687
+ "normalized": false,
688
+ "rstrip": false,
689
+ "single_word": false,
690
+ "special": false
691
+ },
692
+ "86": {
693
+ "content": "<unused79>",
694
+ "lstrip": false,
695
+ "normalized": false,
696
+ "rstrip": false,
697
+ "single_word": false,
698
+ "special": false
699
+ },
700
+ "87": {
701
+ "content": "<unused80>",
702
+ "lstrip": false,
703
+ "normalized": false,
704
+ "rstrip": false,
705
+ "single_word": false,
706
+ "special": false
707
+ },
708
+ "88": {
709
+ "content": "<unused81>",
710
+ "lstrip": false,
711
+ "normalized": false,
712
+ "rstrip": false,
713
+ "single_word": false,
714
+ "special": false
715
+ },
716
+ "89": {
717
+ "content": "<unused82>",
718
+ "lstrip": false,
719
+ "normalized": false,
720
+ "rstrip": false,
721
+ "single_word": false,
722
+ "special": false
723
+ },
724
+ "90": {
725
+ "content": "<unused83>",
726
+ "lstrip": false,
727
+ "normalized": false,
728
+ "rstrip": false,
729
+ "single_word": false,
730
+ "special": false
731
+ },
732
+ "91": {
733
+ "content": "<unused84>",
734
+ "lstrip": false,
735
+ "normalized": false,
736
+ "rstrip": false,
737
+ "single_word": false,
738
+ "special": false
739
+ },
740
+ "92": {
741
+ "content": "<unused85>",
742
+ "lstrip": false,
743
+ "normalized": false,
744
+ "rstrip": false,
745
+ "single_word": false,
746
+ "special": false
747
+ },
748
+ "93": {
749
+ "content": "<unused86>",
750
+ "lstrip": false,
751
+ "normalized": false,
752
+ "rstrip": false,
753
+ "single_word": false,
754
+ "special": false
755
+ },
756
+ "94": {
757
+ "content": "<unused87>",
758
+ "lstrip": false,
759
+ "normalized": false,
760
+ "rstrip": false,
761
+ "single_word": false,
762
+ "special": false
763
+ },
764
+ "95": {
765
+ "content": "<unused88>",
766
+ "lstrip": false,
767
+ "normalized": false,
768
+ "rstrip": false,
769
+ "single_word": false,
770
+ "special": false
771
+ },
772
+ "96": {
773
+ "content": "<unused89>",
774
+ "lstrip": false,
775
+ "normalized": false,
776
+ "rstrip": false,
777
+ "single_word": false,
778
+ "special": false
779
+ },
780
+ "97": {
781
+ "content": "<unused90>",
782
+ "lstrip": false,
783
+ "normalized": false,
784
+ "rstrip": false,
785
+ "single_word": false,
786
+ "special": false
787
+ },
788
+ "98": {
789
+ "content": "<unused91>",
790
+ "lstrip": false,
791
+ "normalized": false,
792
+ "rstrip": false,
793
+ "single_word": false,
794
+ "special": false
795
+ },
796
+ "99": {
797
+ "content": "<unused92>",
798
+ "lstrip": false,
799
+ "normalized": false,
800
+ "rstrip": false,
801
+ "single_word": false,
802
+ "special": false
803
+ },
804
+ "100": {
805
+ "content": "<unused93>",
806
+ "lstrip": false,
807
+ "normalized": false,
808
+ "rstrip": false,
809
+ "single_word": false,
810
+ "special": false
811
+ },
812
+ "101": {
813
+ "content": "<unused94>",
814
+ "lstrip": false,
815
+ "normalized": false,
816
+ "rstrip": false,
817
+ "single_word": false,
818
+ "special": false
819
+ },
820
+ "102": {
821
+ "content": "<unused95>",
822
+ "lstrip": false,
823
+ "normalized": false,
824
+ "rstrip": false,
825
+ "single_word": false,
826
+ "special": false
827
+ },
828
+ "103": {
829
+ "content": "<unused96>",
830
+ "lstrip": false,
831
+ "normalized": false,
832
+ "rstrip": false,
833
+ "single_word": false,
834
+ "special": false
835
+ },
836
+ "104": {
837
+ "content": "<unused97>",
838
+ "lstrip": false,
839
+ "normalized": false,
840
+ "rstrip": false,
841
+ "single_word": false,
842
+ "special": false
843
+ },
844
+ "105": {
845
+ "content": "<unused98>",
846
+ "lstrip": false,
847
+ "normalized": false,
848
+ "rstrip": false,
849
+ "single_word": false,
850
+ "special": false
851
+ },
852
+ "106": {
853
+ "content": "<start_of_turn>",
854
+ "lstrip": false,
855
+ "normalized": false,
856
+ "rstrip": false,
857
+ "single_word": false,
858
+ "special": true
859
+ },
860
+ "107": {
861
+ "content": "<end_of_turn>",
862
+ "lstrip": false,
863
+ "normalized": false,
864
+ "rstrip": false,
865
+ "single_word": false,
866
+ "special": true
867
+ },
868
+ "108": {
869
+ "content": "\n",
870
+ "lstrip": false,
871
+ "normalized": false,
872
+ "rstrip": false,
873
+ "single_word": false,
874
+ "special": false
875
+ },
876
+ "109": {
877
+ "content": "\n\n",
878
+ "lstrip": false,
879
+ "normalized": false,
880
+ "rstrip": false,
881
+ "single_word": false,
882
+ "special": false
883
+ },
884
+ "110": {
885
+ "content": "\n\n\n",
886
+ "lstrip": false,
887
+ "normalized": false,
888
+ "rstrip": false,
889
+ "single_word": false,
890
+ "special": false
891
+ },
892
+ "111": {
893
+ "content": "\n\n\n\n",
894
+ "lstrip": false,
895
+ "normalized": false,
896
+ "rstrip": false,
897
+ "single_word": false,
898
+ "special": false
899
+ },
900
+ "112": {
901
+ "content": "\n\n\n\n\n",
902
+ "lstrip": false,
903
+ "normalized": false,
904
+ "rstrip": false,
905
+ "single_word": false,
906
+ "special": false
907
+ },
908
+ "113": {
909
+ "content": "\n\n\n\n\n\n",
910
+ "lstrip": false,
911
+ "normalized": false,
912
+ "rstrip": false,
913
+ "single_word": false,
914
+ "special": false
915
+ },
916
+ "114": {
917
+ "content": "\n\n\n\n\n\n\n",
918
+ "lstrip": false,
919
+ "normalized": false,
920
+ "rstrip": false,
921
+ "single_word": false,
922
+ "special": false
923
+ },
924
+ "115": {
925
+ "content": "\n\n\n\n\n\n\n\n",
926
+ "lstrip": false,
927
+ "normalized": false,
928
+ "rstrip": false,
929
+ "single_word": false,
930
+ "special": false
931
+ },
932
+ "116": {
933
+ "content": "\n\n\n\n\n\n\n\n\n",
934
+ "lstrip": false,
935
+ "normalized": false,
936
+ "rstrip": false,
937
+ "single_word": false,
938
+ "special": false
939
+ },
940
+ "117": {
941
+ "content": "\n\n\n\n\n\n\n\n\n\n",
942
+ "lstrip": false,
943
+ "normalized": false,
944
+ "rstrip": false,
945
+ "single_word": false,
946
+ "special": false
947
+ },
948
+ "118": {
949
+ "content": "\n\n\n\n\n\n\n\n\n\n\n",
950
+ "lstrip": false,
951
+ "normalized": false,
952
+ "rstrip": false,
953
+ "single_word": false,
954
+ "special": false
955
+ },
956
+ "119": {
957
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n",
958
+ "lstrip": false,
959
+ "normalized": false,
960
+ "rstrip": false,
961
+ "single_word": false,
962
+ "special": false
963
+ },
964
+ "120": {
965
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n",
966
+ "lstrip": false,
967
+ "normalized": false,
968
+ "rstrip": false,
969
+ "single_word": false,
970
+ "special": false
971
+ },
972
+ "121": {
973
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
974
+ "lstrip": false,
975
+ "normalized": false,
976
+ "rstrip": false,
977
+ "single_word": false,
978
+ "special": false
979
+ },
980
+ "122": {
981
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
982
+ "lstrip": false,
983
+ "normalized": false,
984
+ "rstrip": false,
985
+ "single_word": false,
986
+ "special": false
987
+ },
988
+ "123": {
989
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
990
+ "lstrip": false,
991
+ "normalized": false,
992
+ "rstrip": false,
993
+ "single_word": false,
994
+ "special": false
995
+ },
996
+ "124": {
997
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
998
+ "lstrip": false,
999
+ "normalized": false,
1000
+ "rstrip": false,
1001
+ "single_word": false,
1002
+ "special": false
1003
+ },
1004
+ "125": {
1005
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1006
+ "lstrip": false,
1007
+ "normalized": false,
1008
+ "rstrip": false,
1009
+ "single_word": false,
1010
+ "special": false
1011
+ },
1012
+ "126": {
1013
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1014
+ "lstrip": false,
1015
+ "normalized": false,
1016
+ "rstrip": false,
1017
+ "single_word": false,
1018
+ "special": false
1019
+ },
1020
+ "127": {
1021
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1022
+ "lstrip": false,
1023
+ "normalized": false,
1024
+ "rstrip": false,
1025
+ "single_word": false,
1026
+ "special": false
1027
+ },
1028
+ "128": {
1029
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1030
+ "lstrip": false,
1031
+ "normalized": false,
1032
+ "rstrip": false,
1033
+ "single_word": false,
1034
+ "special": false
1035
+ },
1036
+ "129": {
1037
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1038
+ "lstrip": false,
1039
+ "normalized": false,
1040
+ "rstrip": false,
1041
+ "single_word": false,
1042
+ "special": false
1043
+ },
1044
+ "130": {
1045
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1046
+ "lstrip": false,
1047
+ "normalized": false,
1048
+ "rstrip": false,
1049
+ "single_word": false,
1050
+ "special": false
1051
+ },
1052
+ "131": {
1053
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1054
+ "lstrip": false,
1055
+ "normalized": false,
1056
+ "rstrip": false,
1057
+ "single_word": false,
1058
+ "special": false
1059
+ },
1060
+ "132": {
1061
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1062
+ "lstrip": false,
1063
+ "normalized": false,
1064
+ "rstrip": false,
1065
+ "single_word": false,
1066
+ "special": false
1067
+ },
1068
+ "133": {
1069
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1070
+ "lstrip": false,
1071
+ "normalized": false,
1072
+ "rstrip": false,
1073
+ "single_word": false,
1074
+ "special": false
1075
+ },
1076
+ "134": {
1077
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1078
+ "lstrip": false,
1079
+ "normalized": false,
1080
+ "rstrip": false,
1081
+ "single_word": false,
1082
+ "special": false
1083
+ },
1084
+ "135": {
1085
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1086
+ "lstrip": false,
1087
+ "normalized": false,
1088
+ "rstrip": false,
1089
+ "single_word": false,
1090
+ "special": false
1091
+ },
1092
+ "136": {
1093
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1094
+ "lstrip": false,
1095
+ "normalized": false,
1096
+ "rstrip": false,
1097
+ "single_word": false,
1098
+ "special": false
1099
+ },
1100
+ "137": {
1101
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1102
+ "lstrip": false,
1103
+ "normalized": false,
1104
+ "rstrip": false,
1105
+ "single_word": false,
1106
+ "special": false
1107
+ },
1108
+ "138": {
1109
+ "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
1110
+ "lstrip": false,
1111
+ "normalized": false,
1112
+ "rstrip": false,
1113
+ "single_word": false,
1114
+ "special": false
1115
+ },
1116
+ "139": {
1117
+ "content": "▁▁",
1118
+ "lstrip": false,
1119
+ "normalized": false,
1120
+ "rstrip": false,
1121
+ "single_word": false,
1122
+ "special": false
1123
+ },
1124
+ "140": {
1125
+ "content": "▁▁▁",
1126
+ "lstrip": false,
1127
+ "normalized": false,
1128
+ "rstrip": false,
1129
+ "single_word": false,
1130
+ "special": false
1131
+ },
1132
+ "141": {
1133
+ "content": "▁▁▁▁",
1134
+ "lstrip": false,
1135
+ "normalized": false,
1136
+ "rstrip": false,
1137
+ "single_word": false,
1138
+ "special": false
1139
+ },
1140
+ "142": {
1141
+ "content": "▁▁▁▁▁",
1142
+ "lstrip": false,
1143
+ "normalized": false,
1144
+ "rstrip": false,
1145
+ "single_word": false,
1146
+ "special": false
1147
+ },
1148
+ "143": {
1149
+ "content": "▁▁▁▁▁▁",
1150
+ "lstrip": false,
1151
+ "normalized": false,
1152
+ "rstrip": false,
1153
+ "single_word": false,
1154
+ "special": false
1155
+ },
1156
+ "144": {
1157
+ "content": "▁▁▁▁▁▁▁",
1158
+ "lstrip": false,
1159
+ "normalized": false,
1160
+ "rstrip": false,
1161
+ "single_word": false,
1162
+ "special": false
1163
+ },
1164
+ "145": {
1165
+ "content": "▁▁▁▁▁▁▁▁",
1166
+ "lstrip": false,
1167
+ "normalized": false,
1168
+ "rstrip": false,
1169
+ "single_word": false,
1170
+ "special": false
1171
+ },
1172
+ "146": {
1173
+ "content": "▁▁▁▁▁▁▁▁▁",
1174
+ "lstrip": false,
1175
+ "normalized": false,
1176
+ "rstrip": false,
1177
+ "single_word": false,
1178
+ "special": false
1179
+ },
1180
+ "147": {
1181
+ "content": "▁▁▁▁▁▁▁▁▁▁",
1182
+ "lstrip": false,
1183
+ "normalized": false,
1184
+ "rstrip": false,
1185
+ "single_word": false,
1186
+ "special": false
1187
+ },
1188
+ "148": {
1189
+ "content": "▁▁▁▁▁▁▁▁▁▁▁",
1190
+ "lstrip": false,
1191
+ "normalized": false,
1192
+ "rstrip": false,
1193
+ "single_word": false,
1194
+ "special": false
1195
+ },
1196
+ "149": {
1197
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁",
1198
+ "lstrip": false,
1199
+ "normalized": false,
1200
+ "rstrip": false,
1201
+ "single_word": false,
1202
+ "special": false
1203
+ },
1204
+ "150": {
1205
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁",
1206
+ "lstrip": false,
1207
+ "normalized": false,
1208
+ "rstrip": false,
1209
+ "single_word": false,
1210
+ "special": false
1211
+ },
1212
+ "151": {
1213
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1214
+ "lstrip": false,
1215
+ "normalized": false,
1216
+ "rstrip": false,
1217
+ "single_word": false,
1218
+ "special": false
1219
+ },
1220
+ "152": {
1221
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1222
+ "lstrip": false,
1223
+ "normalized": false,
1224
+ "rstrip": false,
1225
+ "single_word": false,
1226
+ "special": false
1227
+ },
1228
+ "153": {
1229
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1230
+ "lstrip": false,
1231
+ "normalized": false,
1232
+ "rstrip": false,
1233
+ "single_word": false,
1234
+ "special": false
1235
+ },
1236
+ "154": {
1237
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1238
+ "lstrip": false,
1239
+ "normalized": false,
1240
+ "rstrip": false,
1241
+ "single_word": false,
1242
+ "special": false
1243
+ },
1244
+ "155": {
1245
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1246
+ "lstrip": false,
1247
+ "normalized": false,
1248
+ "rstrip": false,
1249
+ "single_word": false,
1250
+ "special": false
1251
+ },
1252
+ "156": {
1253
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1254
+ "lstrip": false,
1255
+ "normalized": false,
1256
+ "rstrip": false,
1257
+ "single_word": false,
1258
+ "special": false
1259
+ },
1260
+ "157": {
1261
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1262
+ "lstrip": false,
1263
+ "normalized": false,
1264
+ "rstrip": false,
1265
+ "single_word": false,
1266
+ "special": false
1267
+ },
1268
+ "158": {
1269
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1270
+ "lstrip": false,
1271
+ "normalized": false,
1272
+ "rstrip": false,
1273
+ "single_word": false,
1274
+ "special": false
1275
+ },
1276
+ "159": {
1277
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1278
+ "lstrip": false,
1279
+ "normalized": false,
1280
+ "rstrip": false,
1281
+ "single_word": false,
1282
+ "special": false
1283
+ },
1284
+ "160": {
1285
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1286
+ "lstrip": false,
1287
+ "normalized": false,
1288
+ "rstrip": false,
1289
+ "single_word": false,
1290
+ "special": false
1291
+ },
1292
+ "161": {
1293
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1294
+ "lstrip": false,
1295
+ "normalized": false,
1296
+ "rstrip": false,
1297
+ "single_word": false,
1298
+ "special": false
1299
+ },
1300
+ "162": {
1301
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1302
+ "lstrip": false,
1303
+ "normalized": false,
1304
+ "rstrip": false,
1305
+ "single_word": false,
1306
+ "special": false
1307
+ },
1308
+ "163": {
1309
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1310
+ "lstrip": false,
1311
+ "normalized": false,
1312
+ "rstrip": false,
1313
+ "single_word": false,
1314
+ "special": false
1315
+ },
1316
+ "164": {
1317
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1318
+ "lstrip": false,
1319
+ "normalized": false,
1320
+ "rstrip": false,
1321
+ "single_word": false,
1322
+ "special": false
1323
+ },
1324
+ "165": {
1325
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1326
+ "lstrip": false,
1327
+ "normalized": false,
1328
+ "rstrip": false,
1329
+ "single_word": false,
1330
+ "special": false
1331
+ },
1332
+ "166": {
1333
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1334
+ "lstrip": false,
1335
+ "normalized": false,
1336
+ "rstrip": false,
1337
+ "single_word": false,
1338
+ "special": false
1339
+ },
1340
+ "167": {
1341
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1342
+ "lstrip": false,
1343
+ "normalized": false,
1344
+ "rstrip": false,
1345
+ "single_word": false,
1346
+ "special": false
1347
+ },
1348
+ "168": {
1349
+ "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
1350
+ "lstrip": false,
1351
+ "normalized": false,
1352
+ "rstrip": false,
1353
+ "single_word": false,
1354
+ "special": false
1355
+ },
1356
+ "169": {
1357
+ "content": "<table>",
1358
+ "lstrip": false,
1359
+ "normalized": false,
1360
+ "rstrip": false,
1361
+ "single_word": false,
1362
+ "special": false
1363
+ },
1364
+ "170": {
1365
+ "content": "<caption>",
1366
+ "lstrip": false,
1367
+ "normalized": false,
1368
+ "rstrip": false,
1369
+ "single_word": false,
1370
+ "special": false
1371
+ },
1372
+ "171": {
1373
+ "content": "<thead>",
1374
+ "lstrip": false,
1375
+ "normalized": false,
1376
+ "rstrip": false,
1377
+ "single_word": false,
1378
+ "special": false
1379
+ },
1380
+ "172": {
1381
+ "content": "<tbody>",
1382
+ "lstrip": false,
1383
+ "normalized": false,
1384
+ "rstrip": false,
1385
+ "single_word": false,
1386
+ "special": false
1387
+ },
1388
+ "173": {
1389
+ "content": "<tfoot>",
1390
+ "lstrip": false,
1391
+ "normalized": false,
1392
+ "rstrip": false,
1393
+ "single_word": false,
1394
+ "special": false
1395
+ },
1396
+ "174": {
1397
+ "content": "<tr>",
1398
+ "lstrip": false,
1399
+ "normalized": false,
1400
+ "rstrip": false,
1401
+ "single_word": false,
1402
+ "special": false
1403
+ },
1404
+ "175": {
1405
+ "content": "<th>",
1406
+ "lstrip": false,
1407
+ "normalized": false,
1408
+ "rstrip": false,
1409
+ "single_word": false,
1410
+ "special": false
1411
+ },
1412
+ "176": {
1413
+ "content": "<td>",
1414
+ "lstrip": false,
1415
+ "normalized": false,
1416
+ "rstrip": false,
1417
+ "single_word": false,
1418
+ "special": false
1419
+ },
1420
+ "177": {
1421
+ "content": "</table>",
1422
+ "lstrip": false,
1423
+ "normalized": false,
1424
+ "rstrip": false,
1425
+ "single_word": false,
1426
+ "special": false
1427
+ },
1428
+ "178": {
1429
+ "content": "</caption>",
1430
+ "lstrip": false,
1431
+ "normalized": false,
1432
+ "rstrip": false,
1433
+ "single_word": false,
1434
+ "special": false
1435
+ },
1436
+ "179": {
1437
+ "content": "</thead>",
1438
+ "lstrip": false,
1439
+ "normalized": false,
1440
+ "rstrip": false,
1441
+ "single_word": false,
1442
+ "special": false
1443
+ },
1444
+ "180": {
1445
+ "content": "</tbody>",
1446
+ "lstrip": false,
1447
+ "normalized": false,
1448
+ "rstrip": false,
1449
+ "single_word": false,
1450
+ "special": false
1451
+ },
1452
+ "181": {
1453
+ "content": "</tfoot>",
1454
+ "lstrip": false,
1455
+ "normalized": false,
1456
+ "rstrip": false,
1457
+ "single_word": false,
1458
+ "special": false
1459
+ },
1460
+ "182": {
1461
+ "content": "</tr>",
1462
+ "lstrip": false,
1463
+ "normalized": false,
1464
+ "rstrip": false,
1465
+ "single_word": false,
1466
+ "special": false
1467
+ },
1468
+ "183": {
1469
+ "content": "</th>",
1470
+ "lstrip": false,
1471
+ "normalized": false,
1472
+ "rstrip": false,
1473
+ "single_word": false,
1474
+ "special": false
1475
+ },
1476
+ "184": {
1477
+ "content": "</td>",
1478
+ "lstrip": false,
1479
+ "normalized": false,
1480
+ "rstrip": false,
1481
+ "single_word": false,
1482
+ "special": false
1483
+ },
1484
+ "185": {
1485
+ "content": "<h1>",
1486
+ "lstrip": false,
1487
+ "normalized": false,
1488
+ "rstrip": false,
1489
+ "single_word": false,
1490
+ "special": false
1491
+ },
1492
+ "186": {
1493
+ "content": "<h2>",
1494
+ "lstrip": false,
1495
+ "normalized": false,
1496
+ "rstrip": false,
1497
+ "single_word": false,
1498
+ "special": false
1499
+ },
1500
+ "187": {
1501
+ "content": "<h3>",
1502
+ "lstrip": false,
1503
+ "normalized": false,
1504
+ "rstrip": false,
1505
+ "single_word": false,
1506
+ "special": false
1507
+ },
1508
+ "188": {
1509
+ "content": "<h4>",
1510
+ "lstrip": false,
1511
+ "normalized": false,
1512
+ "rstrip": false,
1513
+ "single_word": false,
1514
+ "special": false
1515
+ },
1516
+ "189": {
1517
+ "content": "<h5>",
1518
+ "lstrip": false,
1519
+ "normalized": false,
1520
+ "rstrip": false,
1521
+ "single_word": false,
1522
+ "special": false
1523
+ },
1524
+ "190": {
1525
+ "content": "<h6>",
1526
+ "lstrip": false,
1527
+ "normalized": false,
1528
+ "rstrip": false,
1529
+ "single_word": false,
1530
+ "special": false
1531
+ },
1532
+ "191": {
1533
+ "content": "<blockquote>",
1534
+ "lstrip": false,
1535
+ "normalized": false,
1536
+ "rstrip": false,
1537
+ "single_word": false,
1538
+ "special": false
1539
+ },
1540
+ "192": {
1541
+ "content": "</h1>",
1542
+ "lstrip": false,
1543
+ "normalized": false,
1544
+ "rstrip": false,
1545
+ "single_word": false,
1546
+ "special": false
1547
+ },
1548
+ "193": {
1549
+ "content": "</h2>",
1550
+ "lstrip": false,
1551
+ "normalized": false,
1552
+ "rstrip": false,
1553
+ "single_word": false,
1554
+ "special": false
1555
+ },
1556
+ "194": {
1557
+ "content": "</h3>",
1558
+ "lstrip": false,
1559
+ "normalized": false,
1560
+ "rstrip": false,
1561
+ "single_word": false,
1562
+ "special": false
1563
+ },
1564
+ "195": {
1565
+ "content": "</h4>",
1566
+ "lstrip": false,
1567
+ "normalized": false,
1568
+ "rstrip": false,
1569
+ "single_word": false,
1570
+ "special": false
1571
+ },
1572
+ "196": {
1573
+ "content": "</h5>",
1574
+ "lstrip": false,
1575
+ "normalized": false,
1576
+ "rstrip": false,
1577
+ "single_word": false,
1578
+ "special": false
1579
+ },
1580
+ "197": {
1581
+ "content": "</h6>",
1582
+ "lstrip": false,
1583
+ "normalized": false,
1584
+ "rstrip": false,
1585
+ "single_word": false,
1586
+ "special": false
1587
+ },
1588
+ "198": {
1589
+ "content": "</blockquote>",
1590
+ "lstrip": false,
1591
+ "normalized": false,
1592
+ "rstrip": false,
1593
+ "single_word": false,
1594
+ "special": false
1595
+ },
1596
+ "199": {
1597
+ "content": "<strong>",
1598
+ "lstrip": false,
1599
+ "normalized": false,
1600
+ "rstrip": false,
1601
+ "single_word": false,
1602
+ "special": false
1603
+ },
1604
+ "200": {
1605
+ "content": "<em>",
1606
+ "lstrip": false,
1607
+ "normalized": false,
1608
+ "rstrip": false,
1609
+ "single_word": false,
1610
+ "special": false
1611
+ },
1612
+ "201": {
1613
+ "content": "<b>",
1614
+ "lstrip": false,
1615
+ "normalized": false,
1616
+ "rstrip": false,
1617
+ "single_word": false,
1618
+ "special": false
1619
+ },
1620
+ "202": {
1621
+ "content": "<i>",
1622
+ "lstrip": false,
1623
+ "normalized": false,
1624
+ "rstrip": false,
1625
+ "single_word": false,
1626
+ "special": false
1627
+ },
1628
+ "203": {
1629
+ "content": "<u>",
1630
+ "lstrip": false,
1631
+ "normalized": false,
1632
+ "rstrip": false,
1633
+ "single_word": false,
1634
+ "special": false
1635
+ },
1636
+ "204": {
1637
+ "content": "<s>",
1638
+ "lstrip": false,
1639
+ "normalized": false,
1640
+ "rstrip": false,
1641
+ "single_word": false,
1642
+ "special": false
1643
+ },
1644
+ "205": {
1645
+ "content": "<sub>",
1646
+ "lstrip": false,
1647
+ "normalized": false,
1648
+ "rstrip": false,
1649
+ "single_word": false,
1650
+ "special": false
1651
+ },
1652
+ "206": {
1653
+ "content": "<sup>",
1654
+ "lstrip": false,
1655
+ "normalized": false,
1656
+ "rstrip": false,
1657
+ "single_word": false,
1658
+ "special": false
1659
+ },
1660
+ "207": {
1661
+ "content": "<code>",
1662
+ "lstrip": false,
1663
+ "normalized": false,
1664
+ "rstrip": false,
1665
+ "single_word": false,
1666
+ "special": false
1667
+ },
1668
+ "208": {
1669
+ "content": "</strong>",
1670
+ "lstrip": false,
1671
+ "normalized": false,
1672
+ "rstrip": false,
1673
+ "single_word": false,
1674
+ "special": false
1675
+ },
1676
+ "209": {
1677
+ "content": "</em>",
1678
+ "lstrip": false,
1679
+ "normalized": false,
1680
+ "rstrip": false,
1681
+ "single_word": false,
1682
+ "special": false
1683
+ },
1684
+ "210": {
1685
+ "content": "</b>",
1686
+ "lstrip": false,
1687
+ "normalized": false,
1688
+ "rstrip": false,
1689
+ "single_word": false,
1690
+ "special": false
1691
+ },
1692
+ "211": {
1693
+ "content": "</i>",
1694
+ "lstrip": false,
1695
+ "normalized": false,
1696
+ "rstrip": false,
1697
+ "single_word": false,
1698
+ "special": false
1699
+ },
1700
+ "212": {
1701
+ "content": "</u>",
1702
+ "lstrip": false,
1703
+ "normalized": false,
1704
+ "rstrip": false,
1705
+ "single_word": false,
1706
+ "special": false
1707
+ },
1708
+ "213": {
1709
+ "content": "</s>",
1710
+ "lstrip": false,
1711
+ "normalized": false,
1712
+ "rstrip": false,
1713
+ "single_word": false,
1714
+ "special": false
1715
+ },
1716
+ "214": {
1717
+ "content": "</sub>",
1718
+ "lstrip": false,
1719
+ "normalized": false,
1720
+ "rstrip": false,
1721
+ "single_word": false,
1722
+ "special": false
1723
+ },
1724
+ "215": {
1725
+ "content": "</sup>",
1726
+ "lstrip": false,
1727
+ "normalized": false,
1728
+ "rstrip": false,
1729
+ "single_word": false,
1730
+ "special": false
1731
+ },
1732
+ "216": {
1733
+ "content": "</code>",
1734
+ "lstrip": false,
1735
+ "normalized": false,
1736
+ "rstrip": false,
1737
+ "single_word": false,
1738
+ "special": false
1739
+ },
1740
+ "255968": {
1741
+ "content": "[toxicity=0]",
1742
+ "lstrip": false,
1743
+ "normalized": false,
1744
+ "rstrip": false,
1745
+ "single_word": false,
1746
+ "special": false
1747
+ },
1748
+ "255969": {
1749
+ "content": "\t\t",
1750
+ "lstrip": false,
1751
+ "normalized": false,
1752
+ "rstrip": false,
1753
+ "single_word": false,
1754
+ "special": false
1755
+ },
1756
+ "255970": {
1757
+ "content": "\t\t\t",
1758
+ "lstrip": false,
1759
+ "normalized": false,
1760
+ "rstrip": false,
1761
+ "single_word": false,
1762
+ "special": false
1763
+ },
1764
+ "255971": {
1765
+ "content": "\t\t\t\t",
1766
+ "lstrip": false,
1767
+ "normalized": false,
1768
+ "rstrip": false,
1769
+ "single_word": false,
1770
+ "special": false
1771
+ },
1772
+ "255972": {
1773
+ "content": "\t\t\t\t\t",
1774
+ "lstrip": false,
1775
+ "normalized": false,
1776
+ "rstrip": false,
1777
+ "single_word": false,
1778
+ "special": false
1779
+ },
1780
+ "255973": {
1781
+ "content": "\t\t\t\t\t\t",
1782
+ "lstrip": false,
1783
+ "normalized": false,
1784
+ "rstrip": false,
1785
+ "single_word": false,
1786
+ "special": false
1787
+ },
1788
+ "255974": {
1789
+ "content": "\t\t\t\t\t\t\t",
1790
+ "lstrip": false,
1791
+ "normalized": false,
1792
+ "rstrip": false,
1793
+ "single_word": false,
1794
+ "special": false
1795
+ },
1796
+ "255975": {
1797
+ "content": "\t\t\t\t\t\t\t\t",
1798
+ "lstrip": false,
1799
+ "normalized": false,
1800
+ "rstrip": false,
1801
+ "single_word": false,
1802
+ "special": false
1803
+ },
1804
+ "255976": {
1805
+ "content": "\t\t\t\t\t\t\t\t\t",
1806
+ "lstrip": false,
1807
+ "normalized": false,
1808
+ "rstrip": false,
1809
+ "single_word": false,
1810
+ "special": false
1811
+ },
1812
+ "255977": {
1813
+ "content": "\t\t\t\t\t\t\t\t\t\t",
1814
+ "lstrip": false,
1815
+ "normalized": false,
1816
+ "rstrip": false,
1817
+ "single_word": false,
1818
+ "special": false
1819
+ },
1820
+ "255978": {
1821
+ "content": "\t\t\t\t\t\t\t\t\t\t\t",
1822
+ "lstrip": false,
1823
+ "normalized": false,
1824
+ "rstrip": false,
1825
+ "single_word": false,
1826
+ "special": false
1827
+ },
1828
+ "255979": {
1829
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t",
1830
+ "lstrip": false,
1831
+ "normalized": false,
1832
+ "rstrip": false,
1833
+ "single_word": false,
1834
+ "special": false
1835
+ },
1836
+ "255980": {
1837
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t",
1838
+ "lstrip": false,
1839
+ "normalized": false,
1840
+ "rstrip": false,
1841
+ "single_word": false,
1842
+ "special": false
1843
+ },
1844
+ "255981": {
1845
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1846
+ "lstrip": false,
1847
+ "normalized": false,
1848
+ "rstrip": false,
1849
+ "single_word": false,
1850
+ "special": false
1851
+ },
1852
+ "255982": {
1853
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1854
+ "lstrip": false,
1855
+ "normalized": false,
1856
+ "rstrip": false,
1857
+ "single_word": false,
1858
+ "special": false
1859
+ },
1860
+ "255983": {
1861
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1862
+ "lstrip": false,
1863
+ "normalized": false,
1864
+ "rstrip": false,
1865
+ "single_word": false,
1866
+ "special": false
1867
+ },
1868
+ "255984": {
1869
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1870
+ "lstrip": false,
1871
+ "normalized": false,
1872
+ "rstrip": false,
1873
+ "single_word": false,
1874
+ "special": false
1875
+ },
1876
+ "255985": {
1877
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1878
+ "lstrip": false,
1879
+ "normalized": false,
1880
+ "rstrip": false,
1881
+ "single_word": false,
1882
+ "special": false
1883
+ },
1884
+ "255986": {
1885
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1886
+ "lstrip": false,
1887
+ "normalized": false,
1888
+ "rstrip": false,
1889
+ "single_word": false,
1890
+ "special": false
1891
+ },
1892
+ "255987": {
1893
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1894
+ "lstrip": false,
1895
+ "normalized": false,
1896
+ "rstrip": false,
1897
+ "single_word": false,
1898
+ "special": false
1899
+ },
1900
+ "255988": {
1901
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1902
+ "lstrip": false,
1903
+ "normalized": false,
1904
+ "rstrip": false,
1905
+ "single_word": false,
1906
+ "special": false
1907
+ },
1908
+ "255989": {
1909
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1910
+ "lstrip": false,
1911
+ "normalized": false,
1912
+ "rstrip": false,
1913
+ "single_word": false,
1914
+ "special": false
1915
+ },
1916
+ "255990": {
1917
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1918
+ "lstrip": false,
1919
+ "normalized": false,
1920
+ "rstrip": false,
1921
+ "single_word": false,
1922
+ "special": false
1923
+ },
1924
+ "255991": {
1925
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1926
+ "lstrip": false,
1927
+ "normalized": false,
1928
+ "rstrip": false,
1929
+ "single_word": false,
1930
+ "special": false
1931
+ },
1932
+ "255992": {
1933
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1934
+ "lstrip": false,
1935
+ "normalized": false,
1936
+ "rstrip": false,
1937
+ "single_word": false,
1938
+ "special": false
1939
+ },
1940
+ "255993": {
1941
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1942
+ "lstrip": false,
1943
+ "normalized": false,
1944
+ "rstrip": false,
1945
+ "single_word": false,
1946
+ "special": false
1947
+ },
1948
+ "255994": {
1949
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1950
+ "lstrip": false,
1951
+ "normalized": false,
1952
+ "rstrip": false,
1953
+ "single_word": false,
1954
+ "special": false
1955
+ },
1956
+ "255995": {
1957
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1958
+ "lstrip": false,
1959
+ "normalized": false,
1960
+ "rstrip": false,
1961
+ "single_word": false,
1962
+ "special": false
1963
+ },
1964
+ "255996": {
1965
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1966
+ "lstrip": false,
1967
+ "normalized": false,
1968
+ "rstrip": false,
1969
+ "single_word": false,
1970
+ "special": false
1971
+ },
1972
+ "255997": {
1973
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1974
+ "lstrip": false,
1975
+ "normalized": false,
1976
+ "rstrip": false,
1977
+ "single_word": false,
1978
+ "special": false
1979
+ },
1980
+ "255998": {
1981
+ "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1982
+ "lstrip": false,
1983
+ "normalized": false,
1984
+ "rstrip": false,
1985
+ "single_word": false,
1986
+ "special": false
1987
+ },
1988
+ "255999": {
1989
+ "content": "<unused99>",
1990
+ "lstrip": false,
1991
+ "normalized": false,
1992
+ "rstrip": false,
1993
+ "single_word": false,
1994
+ "special": false
1995
+ }
1996
+ },
1997
+ "additional_special_tokens": [
1998
+ "<start_of_turn>",
1999
+ "<end_of_turn>"
2000
+ ],
2001
+ "bos_token": "<bos>",
2002
+ "clean_up_tokenization_spaces": false,
2003
+ "cls_token": "<bos>",
2004
+ "eos_token": "<eos>",
2005
+ "extra_special_tokens": {},
2006
+ "mask_token": "<mask>",
2007
+ "model_input_names": [
2008
+ "input_ids",
2009
+ "attention_mask"
2010
+ ],
2011
+ "model_max_length": 8192,
2012
+ "pad_token": "<pad>",
2013
+ "padding_side": "right",
2014
+ "sep_token": "<eos>",
2015
+ "spaces_between_special_tokens": false,
2016
+ "tokenizer_class": "PreTrainedTokenizerFast",
2017
+ "unk_token": "<unk>"
2018
+ }
trainer_state.json ADDED
@@ -0,0 +1,782 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
+ "eval_steps": 5000,
7
+ "global_step": 84375,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.035555555555555556,
14
+ "grad_norm": 31.097620010375977,
15
+ "learning_rate": 5.91964920597298e-06,
16
+ "loss": 1.6025,
17
+ "step": 1000
18
+ },
19
+ {
20
+ "epoch": 0.07111111111111111,
21
+ "grad_norm": 15.004558563232422,
22
+ "learning_rate": 1.1845223986726713e-05,
23
+ "loss": 0.5208,
24
+ "step": 2000
25
+ },
26
+ {
27
+ "epoch": 0.10666666666666667,
28
+ "grad_norm": 18.05305290222168,
29
+ "learning_rate": 1.7770798767480447e-05,
30
+ "loss": 0.4374,
31
+ "step": 3000
32
+ },
33
+ {
34
+ "epoch": 0.14222222222222222,
35
+ "grad_norm": 15.441234588623047,
36
+ "learning_rate": 2.369637354823418e-05,
37
+ "loss": 0.4142,
38
+ "step": 4000
39
+ },
40
+ {
41
+ "epoch": 0.17777777777777778,
42
+ "grad_norm": 11.936789512634277,
43
+ "learning_rate": 2.9621948328987915e-05,
44
+ "loss": 0.3916,
45
+ "step": 5000
46
+ },
47
+ {
48
+ "epoch": 0.17777777777777778,
49
+ "eval_loss": 0.3775472640991211,
50
+ "eval_runtime": 11.6508,
51
+ "eval_samples_per_second": 85.831,
52
+ "eval_steps_per_second": 2.747,
53
+ "eval_sts-dev_pearson_cosine": 0.8150497373285317,
54
+ "eval_sts-dev_spearman_cosine": 0.8196131231126629,
55
+ "step": 5000
56
+ },
57
+ {
58
+ "epoch": 0.21333333333333335,
59
+ "grad_norm": 12.518898963928223,
60
+ "learning_rate": 3.554752310974165e-05,
61
+ "loss": 0.382,
62
+ "step": 6000
63
+ },
64
+ {
65
+ "epoch": 0.24888888888888888,
66
+ "grad_norm": 9.117196083068848,
67
+ "learning_rate": 4.147309789049538e-05,
68
+ "loss": 0.3824,
69
+ "step": 7000
70
+ },
71
+ {
72
+ "epoch": 0.28444444444444444,
73
+ "grad_norm": 9.207646369934082,
74
+ "learning_rate": 4.739867267124911e-05,
75
+ "loss": 0.3744,
76
+ "step": 8000
77
+ },
78
+ {
79
+ "epoch": 0.32,
80
+ "grad_norm": 8.699370384216309,
81
+ "learning_rate": 4.9630614851785036e-05,
82
+ "loss": 0.3781,
83
+ "step": 9000
84
+ },
85
+ {
86
+ "epoch": 0.35555555555555557,
87
+ "grad_norm": 5.3027729988098145,
88
+ "learning_rate": 4.897217430238224e-05,
89
+ "loss": 0.367,
90
+ "step": 10000
91
+ },
92
+ {
93
+ "epoch": 0.35555555555555557,
94
+ "eval_loss": 0.37576642632484436,
95
+ "eval_runtime": 35.1214,
96
+ "eval_samples_per_second": 28.473,
97
+ "eval_steps_per_second": 0.911,
98
+ "eval_sts-dev_pearson_cosine": 0.8076134919907783,
99
+ "eval_sts-dev_spearman_cosine": 0.8136231582786481,
100
+ "step": 10000
101
+ },
102
+ {
103
+ "epoch": 0.39111111111111113,
104
+ "grad_norm": 13.608588218688965,
105
+ "learning_rate": 4.831373375297945e-05,
106
+ "loss": 0.3527,
107
+ "step": 11000
108
+ },
109
+ {
110
+ "epoch": 0.4266666666666667,
111
+ "grad_norm": 2.712616443634033,
112
+ "learning_rate": 4.765529320357665e-05,
113
+ "loss": 0.3354,
114
+ "step": 12000
115
+ },
116
+ {
117
+ "epoch": 0.4622222222222222,
118
+ "grad_norm": 7.297984600067139,
119
+ "learning_rate": 4.699685265417386e-05,
120
+ "loss": 0.3147,
121
+ "step": 13000
122
+ },
123
+ {
124
+ "epoch": 0.49777777777777776,
125
+ "grad_norm": 3.9520838260650635,
126
+ "learning_rate": 4.633841210477106e-05,
127
+ "loss": 0.3084,
128
+ "step": 14000
129
+ },
130
+ {
131
+ "epoch": 0.5333333333333333,
132
+ "grad_norm": 3.8519201278686523,
133
+ "learning_rate": 4.567997155536827e-05,
134
+ "loss": 0.2975,
135
+ "step": 15000
136
+ },
137
+ {
138
+ "epoch": 0.5333333333333333,
139
+ "eval_loss": 0.35147717595100403,
140
+ "eval_runtime": 30.6971,
141
+ "eval_samples_per_second": 32.576,
142
+ "eval_steps_per_second": 1.042,
143
+ "eval_sts-dev_pearson_cosine": 0.8128354003998368,
144
+ "eval_sts-dev_spearman_cosine": 0.8205304242394997,
145
+ "step": 15000
146
+ },
147
+ {
148
+ "epoch": 0.5688888888888889,
149
+ "grad_norm": 7.860720634460449,
150
+ "learning_rate": 4.502153100596547e-05,
151
+ "loss": 0.2978,
152
+ "step": 16000
153
+ },
154
+ {
155
+ "epoch": 0.6044444444444445,
156
+ "grad_norm": 7.748737335205078,
157
+ "learning_rate": 4.436309045656268e-05,
158
+ "loss": 0.2916,
159
+ "step": 17000
160
+ },
161
+ {
162
+ "epoch": 0.64,
163
+ "grad_norm": 8.615702629089355,
164
+ "learning_rate": 4.370464990715988e-05,
165
+ "loss": 0.2792,
166
+ "step": 18000
167
+ },
168
+ {
169
+ "epoch": 0.6755555555555556,
170
+ "grad_norm": 5.2580156326293945,
171
+ "learning_rate": 4.304620935775709e-05,
172
+ "loss": 0.2661,
173
+ "step": 19000
174
+ },
175
+ {
176
+ "epoch": 0.7111111111111111,
177
+ "grad_norm": 9.318315505981445,
178
+ "learning_rate": 4.238776880835429e-05,
179
+ "loss": 0.2583,
180
+ "step": 20000
181
+ },
182
+ {
183
+ "epoch": 0.7111111111111111,
184
+ "eval_loss": 0.3185396194458008,
185
+ "eval_runtime": 30.8313,
186
+ "eval_samples_per_second": 32.435,
187
+ "eval_steps_per_second": 1.038,
188
+ "eval_sts-dev_pearson_cosine": 0.8058349279884176,
189
+ "eval_sts-dev_spearman_cosine": 0.8159206996876398,
190
+ "step": 20000
191
+ },
192
+ {
193
+ "epoch": 0.7466666666666667,
194
+ "grad_norm": 4.720574855804443,
195
+ "learning_rate": 4.17293282589515e-05,
196
+ "loss": 0.2529,
197
+ "step": 21000
198
+ },
199
+ {
200
+ "epoch": 0.7822222222222223,
201
+ "grad_norm": 6.6863484382629395,
202
+ "learning_rate": 4.10708877095487e-05,
203
+ "loss": 0.2505,
204
+ "step": 22000
205
+ },
206
+ {
207
+ "epoch": 0.8177777777777778,
208
+ "grad_norm": 3.1214375495910645,
209
+ "learning_rate": 4.041244716014591e-05,
210
+ "loss": 0.2454,
211
+ "step": 23000
212
+ },
213
+ {
214
+ "epoch": 0.8533333333333334,
215
+ "grad_norm": 0.6488115787506104,
216
+ "learning_rate": 3.9754006610743114e-05,
217
+ "loss": 0.242,
218
+ "step": 24000
219
+ },
220
+ {
221
+ "epoch": 0.8888888888888888,
222
+ "grad_norm": 5.3746113777160645,
223
+ "learning_rate": 3.909556606134032e-05,
224
+ "loss": 0.2307,
225
+ "step": 25000
226
+ },
227
+ {
228
+ "epoch": 0.8888888888888888,
229
+ "eval_loss": 0.28817781805992126,
230
+ "eval_runtime": 31.3269,
231
+ "eval_samples_per_second": 31.921,
232
+ "eval_steps_per_second": 1.021,
233
+ "eval_sts-dev_pearson_cosine": 0.8068056591060926,
234
+ "eval_sts-dev_spearman_cosine": 0.8175004911072571,
235
+ "step": 25000
236
+ },
237
+ {
238
+ "epoch": 0.9244444444444444,
239
+ "grad_norm": 7.79231595993042,
240
+ "learning_rate": 3.8437125511937525e-05,
241
+ "loss": 0.2349,
242
+ "step": 26000
243
+ },
244
+ {
245
+ "epoch": 0.96,
246
+ "grad_norm": 9.042169570922852,
247
+ "learning_rate": 3.7778684962534734e-05,
248
+ "loss": 0.2238,
249
+ "step": 27000
250
+ },
251
+ {
252
+ "epoch": 0.9955555555555555,
253
+ "grad_norm": 10.183342933654785,
254
+ "learning_rate": 3.7120244413131936e-05,
255
+ "loss": 0.2132,
256
+ "step": 28000
257
+ },
258
+ {
259
+ "epoch": 1.031111111111111,
260
+ "grad_norm": 1.6765131950378418,
261
+ "learning_rate": 3.6461803863729144e-05,
262
+ "loss": 0.1601,
263
+ "step": 29000
264
+ },
265
+ {
266
+ "epoch": 1.0666666666666667,
267
+ "grad_norm": 2.5507757663726807,
268
+ "learning_rate": 3.5803363314326346e-05,
269
+ "loss": 0.1581,
270
+ "step": 30000
271
+ },
272
+ {
273
+ "epoch": 1.0666666666666667,
274
+ "eval_loss": 0.2580932080745697,
275
+ "eval_runtime": 31.0615,
276
+ "eval_samples_per_second": 32.194,
277
+ "eval_steps_per_second": 1.03,
278
+ "eval_sts-dev_pearson_cosine": 0.8157474163754647,
279
+ "eval_sts-dev_spearman_cosine": 0.8222299144499523,
280
+ "step": 30000
281
+ },
282
+ {
283
+ "epoch": 1.1022222222222222,
284
+ "grad_norm": 2.3133509159088135,
285
+ "learning_rate": 3.5144922764923555e-05,
286
+ "loss": 0.1532,
287
+ "step": 31000
288
+ },
289
+ {
290
+ "epoch": 1.1377777777777778,
291
+ "grad_norm": 2.026036262512207,
292
+ "learning_rate": 3.448648221552076e-05,
293
+ "loss": 0.1494,
294
+ "step": 32000
295
+ },
296
+ {
297
+ "epoch": 1.1733333333333333,
298
+ "grad_norm": 5.352447986602783,
299
+ "learning_rate": 3.3828041666117966e-05,
300
+ "loss": 0.1484,
301
+ "step": 33000
302
+ },
303
+ {
304
+ "epoch": 1.208888888888889,
305
+ "grad_norm": 2.9670166969299316,
306
+ "learning_rate": 3.316960111671517e-05,
307
+ "loss": 0.1529,
308
+ "step": 34000
309
+ },
310
+ {
311
+ "epoch": 1.2444444444444445,
312
+ "grad_norm": 6.357540607452393,
313
+ "learning_rate": 3.251116056731238e-05,
314
+ "loss": 0.1467,
315
+ "step": 35000
316
+ },
317
+ {
318
+ "epoch": 1.2444444444444445,
319
+ "eval_loss": 0.24440895020961761,
320
+ "eval_runtime": 31.2456,
321
+ "eval_samples_per_second": 32.005,
322
+ "eval_steps_per_second": 1.024,
323
+ "eval_sts-dev_pearson_cosine": 0.814902241190906,
324
+ "eval_sts-dev_spearman_cosine": 0.8211414049575237,
325
+ "step": 35000
326
+ },
327
+ {
328
+ "epoch": 1.28,
329
+ "grad_norm": 6.782201766967773,
330
+ "learning_rate": 3.185272001790958e-05,
331
+ "loss": 0.1522,
332
+ "step": 36000
333
+ },
334
+ {
335
+ "epoch": 1.3155555555555556,
336
+ "grad_norm": 0.9291681051254272,
337
+ "learning_rate": 3.119427946850679e-05,
338
+ "loss": 0.1412,
339
+ "step": 37000
340
+ },
341
+ {
342
+ "epoch": 1.3511111111111112,
343
+ "grad_norm": 6.588221073150635,
344
+ "learning_rate": 3.053583891910399e-05,
345
+ "loss": 0.1416,
346
+ "step": 38000
347
+ },
348
+ {
349
+ "epoch": 1.3866666666666667,
350
+ "grad_norm": 7.346938133239746,
351
+ "learning_rate": 2.9877398369701205e-05,
352
+ "loss": 0.1393,
353
+ "step": 39000
354
+ },
355
+ {
356
+ "epoch": 1.4222222222222223,
357
+ "grad_norm": 7.207787990570068,
358
+ "learning_rate": 2.921895782029841e-05,
359
+ "loss": 0.1389,
360
+ "step": 40000
361
+ },
362
+ {
363
+ "epoch": 1.4222222222222223,
364
+ "eval_loss": 0.2562263309955597,
365
+ "eval_runtime": 31.2811,
366
+ "eval_samples_per_second": 31.968,
367
+ "eval_steps_per_second": 1.023,
368
+ "eval_sts-dev_pearson_cosine": 0.8227387520545976,
369
+ "eval_sts-dev_spearman_cosine": 0.826905561534745,
370
+ "step": 40000
371
+ },
372
+ {
373
+ "epoch": 1.4577777777777778,
374
+ "grad_norm": 2.1179184913635254,
375
+ "learning_rate": 2.8560517270895616e-05,
376
+ "loss": 0.1353,
377
+ "step": 41000
378
+ },
379
+ {
380
+ "epoch": 1.4933333333333334,
381
+ "grad_norm": 2.6916444301605225,
382
+ "learning_rate": 2.790207672149282e-05,
383
+ "loss": 0.1284,
384
+ "step": 42000
385
+ },
386
+ {
387
+ "epoch": 1.528888888888889,
388
+ "grad_norm": 9.770589828491211,
389
+ "learning_rate": 2.7243636172090027e-05,
390
+ "loss": 0.1317,
391
+ "step": 43000
392
+ },
393
+ {
394
+ "epoch": 1.5644444444444443,
395
+ "grad_norm": 2.033369779586792,
396
+ "learning_rate": 2.6585195622687232e-05,
397
+ "loss": 0.1242,
398
+ "step": 44000
399
+ },
400
+ {
401
+ "epoch": 1.6,
402
+ "grad_norm": 9.705333709716797,
403
+ "learning_rate": 2.5926755073284438e-05,
404
+ "loss": 0.1228,
405
+ "step": 45000
406
+ },
407
+ {
408
+ "epoch": 1.6,
409
+ "eval_loss": 0.22733546793460846,
410
+ "eval_runtime": 30.7521,
411
+ "eval_samples_per_second": 32.518,
412
+ "eval_steps_per_second": 1.041,
413
+ "eval_sts-dev_pearson_cosine": 0.8129372102759819,
414
+ "eval_sts-dev_spearman_cosine": 0.8243194932998689,
415
+ "step": 45000
416
+ },
417
+ {
418
+ "epoch": 1.6355555555555554,
419
+ "grad_norm": 6.558931827545166,
420
+ "learning_rate": 2.5268314523881643e-05,
421
+ "loss": 0.1308,
422
+ "step": 46000
423
+ },
424
+ {
425
+ "epoch": 1.6711111111111112,
426
+ "grad_norm": 2.8348495960235596,
427
+ "learning_rate": 2.4609873974478845e-05,
428
+ "loss": 0.1231,
429
+ "step": 47000
430
+ },
431
+ {
432
+ "epoch": 1.7066666666666666,
433
+ "grad_norm": 10.423678398132324,
434
+ "learning_rate": 2.395143342507605e-05,
435
+ "loss": 0.1196,
436
+ "step": 48000
437
+ },
438
+ {
439
+ "epoch": 1.7422222222222223,
440
+ "grad_norm": 4.737322807312012,
441
+ "learning_rate": 2.3292992875673256e-05,
442
+ "loss": 0.1202,
443
+ "step": 49000
444
+ },
445
+ {
446
+ "epoch": 1.7777777777777777,
447
+ "grad_norm": 8.491903305053711,
448
+ "learning_rate": 2.263455232627046e-05,
449
+ "loss": 0.12,
450
+ "step": 50000
451
+ },
452
+ {
453
+ "epoch": 1.7777777777777777,
454
+ "eval_loss": 0.21721433103084564,
455
+ "eval_runtime": 48.5394,
456
+ "eval_samples_per_second": 20.602,
457
+ "eval_steps_per_second": 0.659,
458
+ "eval_sts-dev_pearson_cosine": 0.8221879391828698,
459
+ "eval_sts-dev_spearman_cosine": 0.8276168454271825,
460
+ "step": 50000
461
+ },
462
+ {
463
+ "epoch": 1.8133333333333335,
464
+ "grad_norm": 7.367649078369141,
465
+ "learning_rate": 2.1976111776867667e-05,
466
+ "loss": 0.1213,
467
+ "step": 51000
468
+ },
469
+ {
470
+ "epoch": 1.8488888888888888,
471
+ "grad_norm": 3.9367151260375977,
472
+ "learning_rate": 2.1317671227464872e-05,
473
+ "loss": 0.1134,
474
+ "step": 52000
475
+ },
476
+ {
477
+ "epoch": 1.8844444444444446,
478
+ "grad_norm": 0.3449944853782654,
479
+ "learning_rate": 2.0659230678062077e-05,
480
+ "loss": 0.109,
481
+ "step": 53000
482
+ },
483
+ {
484
+ "epoch": 1.92,
485
+ "grad_norm": 1.9318583011627197,
486
+ "learning_rate": 2.0000790128659283e-05,
487
+ "loss": 0.1158,
488
+ "step": 54000
489
+ },
490
+ {
491
+ "epoch": 1.9555555555555557,
492
+ "grad_norm": 2.4510562419891357,
493
+ "learning_rate": 1.9342349579256488e-05,
494
+ "loss": 0.1073,
495
+ "step": 55000
496
+ },
497
+ {
498
+ "epoch": 1.9555555555555557,
499
+ "eval_loss": 0.22918041050434113,
500
+ "eval_runtime": 48.7814,
501
+ "eval_samples_per_second": 20.5,
502
+ "eval_steps_per_second": 0.656,
503
+ "eval_sts-dev_pearson_cosine": 0.8226444171661187,
504
+ "eval_sts-dev_spearman_cosine": 0.832783411507352,
505
+ "step": 55000
506
+ },
507
+ {
508
+ "epoch": 1.991111111111111,
509
+ "grad_norm": 8.485190391540527,
510
+ "learning_rate": 1.8683909029853697e-05,
511
+ "loss": 0.1053,
512
+ "step": 56000
513
+ },
514
+ {
515
+ "epoch": 2.026666666666667,
516
+ "grad_norm": 0.6446801424026489,
517
+ "learning_rate": 1.8025468480450902e-05,
518
+ "loss": 0.0643,
519
+ "step": 57000
520
+ },
521
+ {
522
+ "epoch": 2.062222222222222,
523
+ "grad_norm": 9.16294002532959,
524
+ "learning_rate": 1.7367027931048108e-05,
525
+ "loss": 0.0546,
526
+ "step": 58000
527
+ },
528
+ {
529
+ "epoch": 2.097777777777778,
530
+ "grad_norm": 4.509792327880859,
531
+ "learning_rate": 1.6708587381645313e-05,
532
+ "loss": 0.054,
533
+ "step": 59000
534
+ },
535
+ {
536
+ "epoch": 2.1333333333333333,
537
+ "grad_norm": 0.07099995762109756,
538
+ "learning_rate": 1.605014683224252e-05,
539
+ "loss": 0.0535,
540
+ "step": 60000
541
+ },
542
+ {
543
+ "epoch": 2.1333333333333333,
544
+ "eval_loss": 0.279130220413208,
545
+ "eval_runtime": 55.8907,
546
+ "eval_samples_per_second": 17.892,
547
+ "eval_steps_per_second": 0.573,
548
+ "eval_sts-dev_pearson_cosine": 0.8213308892826775,
549
+ "eval_sts-dev_spearman_cosine": 0.8271747105077873,
550
+ "step": 60000
551
+ },
552
+ {
553
+ "epoch": 2.168888888888889,
554
+ "grad_norm": 1.7394822835922241,
555
+ "learning_rate": 1.5391706282839724e-05,
556
+ "loss": 0.0512,
557
+ "step": 61000
558
+ },
559
+ {
560
+ "epoch": 2.2044444444444444,
561
+ "grad_norm": 7.789730548858643,
562
+ "learning_rate": 1.473326573343693e-05,
563
+ "loss": 0.0546,
564
+ "step": 62000
565
+ },
566
+ {
567
+ "epoch": 2.24,
568
+ "grad_norm": 10.078393936157227,
569
+ "learning_rate": 1.4074825184034135e-05,
570
+ "loss": 0.0539,
571
+ "step": 63000
572
+ },
573
+ {
574
+ "epoch": 2.2755555555555556,
575
+ "grad_norm": 2.606584310531616,
576
+ "learning_rate": 1.341638463463134e-05,
577
+ "loss": 0.0561,
578
+ "step": 64000
579
+ },
580
+ {
581
+ "epoch": 2.311111111111111,
582
+ "grad_norm": 4.760853290557861,
583
+ "learning_rate": 1.2757944085228546e-05,
584
+ "loss": 0.0478,
585
+ "step": 65000
586
+ },
587
+ {
588
+ "epoch": 2.311111111111111,
589
+ "eval_loss": 0.256197065114975,
590
+ "eval_runtime": 55.5249,
591
+ "eval_samples_per_second": 18.01,
592
+ "eval_steps_per_second": 0.576,
593
+ "eval_sts-dev_pearson_cosine": 0.8246413401396585,
594
+ "eval_sts-dev_spearman_cosine": 0.8288094370870545,
595
+ "step": 65000
596
+ },
597
+ {
598
+ "epoch": 2.3466666666666667,
599
+ "grad_norm": 1.3447166681289673,
600
+ "learning_rate": 1.2099503535825751e-05,
601
+ "loss": 0.0555,
602
+ "step": 66000
603
+ },
604
+ {
605
+ "epoch": 2.3822222222222225,
606
+ "grad_norm": 2.37924861907959,
607
+ "learning_rate": 1.1441062986422956e-05,
608
+ "loss": 0.0503,
609
+ "step": 67000
610
+ },
611
+ {
612
+ "epoch": 2.417777777777778,
613
+ "grad_norm": 1.2358015775680542,
614
+ "learning_rate": 1.0782622437020162e-05,
615
+ "loss": 0.0449,
616
+ "step": 68000
617
+ },
618
+ {
619
+ "epoch": 2.453333333333333,
620
+ "grad_norm": 0.22912859916687012,
621
+ "learning_rate": 1.0124181887617367e-05,
622
+ "loss": 0.0482,
623
+ "step": 69000
624
+ },
625
+ {
626
+ "epoch": 2.488888888888889,
627
+ "grad_norm": 9.296670913696289,
628
+ "learning_rate": 9.465741338214573e-06,
629
+ "loss": 0.0493,
630
+ "step": 70000
631
+ },
632
+ {
633
+ "epoch": 2.488888888888889,
634
+ "eval_loss": 0.26088747382164,
635
+ "eval_runtime": 55.5071,
636
+ "eval_samples_per_second": 18.016,
637
+ "eval_steps_per_second": 0.577,
638
+ "eval_sts-dev_pearson_cosine": 0.8255009970943029,
639
+ "eval_sts-dev_spearman_cosine": 0.831174307235804,
640
+ "step": 70000
641
+ },
642
+ {
643
+ "epoch": 2.5244444444444447,
644
+ "grad_norm": 1.4696452617645264,
645
+ "learning_rate": 8.807300788811778e-06,
646
+ "loss": 0.0486,
647
+ "step": 71000
648
+ },
649
+ {
650
+ "epoch": 2.56,
651
+ "grad_norm": 0.014002230018377304,
652
+ "learning_rate": 8.148860239408983e-06,
653
+ "loss": 0.0483,
654
+ "step": 72000
655
+ },
656
+ {
657
+ "epoch": 2.5955555555555554,
658
+ "grad_norm": 10.814248085021973,
659
+ "learning_rate": 7.49041969000619e-06,
660
+ "loss": 0.0444,
661
+ "step": 73000
662
+ },
663
+ {
664
+ "epoch": 2.631111111111111,
665
+ "grad_norm": 0.9171813726425171,
666
+ "learning_rate": 6.831979140603395e-06,
667
+ "loss": 0.0421,
668
+ "step": 74000
669
+ },
670
+ {
671
+ "epoch": 2.6666666666666665,
672
+ "grad_norm": 0.6967930197715759,
673
+ "learning_rate": 6.1735385912006005e-06,
674
+ "loss": 0.042,
675
+ "step": 75000
676
+ },
677
+ {
678
+ "epoch": 2.6666666666666665,
679
+ "eval_loss": 0.25533053278923035,
680
+ "eval_runtime": 55.7954,
681
+ "eval_samples_per_second": 17.923,
682
+ "eval_steps_per_second": 0.574,
683
+ "eval_sts-dev_pearson_cosine": 0.8229336010617173,
684
+ "eval_sts-dev_spearman_cosine": 0.8301305406762217,
685
+ "step": 75000
686
+ },
687
+ {
688
+ "epoch": 2.7022222222222223,
689
+ "grad_norm": 5.206503391265869,
690
+ "learning_rate": 5.515098041797807e-06,
691
+ "loss": 0.0409,
692
+ "step": 76000
693
+ },
694
+ {
695
+ "epoch": 2.7377777777777776,
696
+ "grad_norm": 8.96776008605957,
697
+ "learning_rate": 4.856657492395012e-06,
698
+ "loss": 0.0456,
699
+ "step": 77000
700
+ },
701
+ {
702
+ "epoch": 2.7733333333333334,
703
+ "grad_norm": 1.4853557348251343,
704
+ "learning_rate": 4.1982169429922175e-06,
705
+ "loss": 0.0411,
706
+ "step": 78000
707
+ },
708
+ {
709
+ "epoch": 2.8088888888888888,
710
+ "grad_norm": 0.08554021269083023,
711
+ "learning_rate": 3.539776393589423e-06,
712
+ "loss": 0.0416,
713
+ "step": 79000
714
+ },
715
+ {
716
+ "epoch": 2.8444444444444446,
717
+ "grad_norm": 0.010798577219247818,
718
+ "learning_rate": 2.8813358441866287e-06,
719
+ "loss": 0.0392,
720
+ "step": 80000
721
+ },
722
+ {
723
+ "epoch": 2.8444444444444446,
724
+ "eval_loss": 0.24324475228786469,
725
+ "eval_runtime": 55.6948,
726
+ "eval_samples_per_second": 17.955,
727
+ "eval_steps_per_second": 0.575,
728
+ "eval_sts-dev_pearson_cosine": 0.8258729995042317,
729
+ "eval_sts-dev_spearman_cosine": 0.8310916968189412,
730
+ "step": 80000
731
+ },
732
+ {
733
+ "epoch": 2.88,
734
+ "grad_norm": 1.3684927225112915,
735
+ "learning_rate": 2.222895294783834e-06,
736
+ "loss": 0.0365,
737
+ "step": 81000
738
+ },
739
+ {
740
+ "epoch": 2.9155555555555557,
741
+ "grad_norm": 21.493656158447266,
742
+ "learning_rate": 1.5644547453810395e-06,
743
+ "loss": 0.042,
744
+ "step": 82000
745
+ },
746
+ {
747
+ "epoch": 2.951111111111111,
748
+ "grad_norm": 7.1114654541015625,
749
+ "learning_rate": 9.06014195978245e-07,
750
+ "loss": 0.0422,
751
+ "step": 83000
752
+ },
753
+ {
754
+ "epoch": 2.986666666666667,
755
+ "grad_norm": 0.7839590907096863,
756
+ "learning_rate": 2.475736465754507e-07,
757
+ "loss": 0.0343,
758
+ "step": 84000
759
+ }
760
+ ],
761
+ "logging_steps": 1000,
762
+ "max_steps": 84375,
763
+ "num_input_tokens_seen": 0,
764
+ "num_train_epochs": 3,
765
+ "save_steps": 5000,
766
+ "stateful_callbacks": {
767
+ "TrainerControl": {
768
+ "args": {
769
+ "should_epoch_stop": false,
770
+ "should_evaluate": false,
771
+ "should_log": false,
772
+ "should_save": true,
773
+ "should_training_stop": true
774
+ },
775
+ "attributes": {}
776
+ }
777
+ },
778
+ "total_flos": 0.0,
779
+ "train_batch_size": 32,
780
+ "trial_name": null,
781
+ "trial_params": null
782
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b48f82cd083eebdae604861bbcf20c7715f2142e5c0b99f7e52f5e232486b883
3
+ size 5777