Wolfvin commited on
Commit
e6557b8
·
verified ·
1 Parent(s): 7524420

Upload tokenizer.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer.json +1059 -908
tokenizer.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "config": {
3
- "bpe_vocab_size": 28000,
4
  "max_sentences": 32,
5
  "sentence_boundary_token": "<sent>",
6
  "pad_token": "<pad>",
@@ -33,932 +33,1083 @@
33
  "2": 19,
34
  "3": 20,
35
  "4": 21,
36
- "6": 22,
37
- "7": 23,
38
- "8": 24,
39
- "9": 25,
40
- ":": 26,
41
- ";": 27,
42
- "?": 28,
43
- "_": 29,
44
- "a": 30,
45
- "b": 31,
46
- "c": 32,
47
- "d": 33,
48
- "e": 34,
49
- "f": 35,
50
- "g": 36,
51
- "h": 37,
52
- "i": 38,
53
- "j": 39,
54
- "k": 40,
55
- "l": 41,
56
- "m": 42,
57
- "n": 43,
58
- "o": 44,
59
- "p": 45,
60
- "r": 46,
61
- "s": 47,
62
- "t": 48,
63
- "u": 49,
64
- "v": 50,
65
- "w": 51,
66
- "y": 52,
67
- "z": 53,
68
- "an": 54,
69
- "an</w>": 55,
70
- "er": 56,
71
- "en": 57,
72
  "da": 58,
73
- "ti": 59,
74
- "il": 60,
75
- "si": 61,
76
- "di": 62,
77
- "ang</w>": 63,
78
- "si</w>": 64,
79
- "anc": 65,
80
- "kan</w>": 66,
81
  "al": 67,
82
- "su": 68,
83
- "ang": 69,
84
- "ri</w>": 70,
85
- "ke": 71,
86
- "ef": 72,
87
- "ter": 73,
88
- "se": 74,
89
- "te": 75,
90
- "pa": 76,
91
- "ng": 77,
92
- "on</w>": 78,
93
- "on": 79,
94
- "hef": 80,
95
- "hefe": 81,
96
- "enc": 82,
97
- "or": 83,
98
- "la": 84,
99
- "sim": 85,
100
- "ul": 86,
101
- "tida": 87,
102
- "ar": 88,
103
- "eng": 89,
104
  "dari</w>": 90,
105
- "re": 91,
106
- "bu": 92,
107
- "ance</w>": 93,
108
- "ra": 94,
109
- "om": 95,
110
- "hefei</w>": 96,
111
- "jang": 97,
112
- "sa": 98,
113
- "ju</w>": 99,
114
- "jangm": 100,
115
- "jangmo": 101,
116
- "jangmok</w>": 102,
117
- "al</w>": 103,
118
- "os": 104,
119
- "dianc": 105,
120
- "diancang</w>": 106,
121
- "ai": 107,
122
- "in": 108,
123
- "ja": 109,
124
- "kon": 110,
125
- "li": 111,
126
- "ct</w>": 112,
127
- "tidak</w>": 113,
128
- "eri": 114,
129
- "fi": 115,
130
- "meng": 116,
131
- "asi</w>": 117,
132
- "kesim": 118,
133
- "kesimp": 119,
134
- "kesimpul": 120,
135
- "kesimpulan</w>": 121,
136
- "di</w>": 122,
137
  "ngkan</w>": 123,
138
- "ksi</w>": 124,
139
- "pi": 125,
140
- "ya</w>": 126,
141
- "yang</w>": 127,
142
- "encu": 128,
143
- "ta": 129,
144
- "buk": 130,
145
- "bukt": 131,
146
- "bukti</w>": 132,
147
- "pen": 133,
148
- "per": 134,
149
- "lu": 135,
150
- "le": 136,
151
- "fiv": 137,
152
- "five</w>": 138,
153
- "sw": 139,
154
- "swor": 140,
155
- "sword": 141,
156
- "swords</w>": 142,
157
- "pencu": 143,
158
- "ence</w>": 144,
159
- "ce": 145,
160
- "ku": 146,
161
- "ili": 147,
162
- "sn": 148,
163
- "sno": 149,
164
- "snow</w>": 150,
165
- "plu": 151,
166
- "plum</w>": 152,
167
- "pil": 153,
168
- "pill</w>": 154,
169
- "mengh": 155,
170
- "menghil": 156,
171
- "menghilang</w>": 157,
172
  "lo": 158,
173
- "bi": 159,
174
- "de": 160,
175
- "anom": 161,
176
- "anomal": 162,
177
- "mar": 163,
178
- "marti": 164,
179
- "martial</w>": 165,
180
- "alli": 166,
181
- "alliance</w>": 167,
182
- "mu": 168,
183
- "anal": 169,
184
- "anali": 170,
185
- "analisi": 171,
186
- "analisis</w>": 172,
187
- "gy": 173,
188
- "gyer": 174,
189
- "gyery": 175,
190
- "gyeryon": 176,
191
- "gyeryong</w>": 177,
192
- "mer": 178,
193
- "merc": 179,
194
- "merch": 180,
195
- "merchan": 181,
196
- "merchant</w>": 182,
197
- "gu": 183,
198
- "guil": 184,
199
- "guild</w>": 185,
200
- "ha": 186,
201
- "cr": 187,
202
- "cros": 188,
203
- "cross</w>": 189,
204
- "ref": 190,
205
- "refer": 191,
206
- "reference</w>": 192,
207
- "keja": 193,
208
- "kejadi": 194,
209
- "kejadian</w>": 195,
210
- "simh": 196,
211
- "simhy": 197,
212
- "simhye": 198,
213
- "simhyeon</w>": 199,
214
- "pav": 200,
215
- "pavili": 201,
216
- "pavilion</w>": 202,
217
- "me": 203,
218
- "tion</w>": 204,
219
- "sum": 205,
220
- "blo": 206,
221
- "bloo": 207,
222
- "blood</w>": 208,
223
- "ser": 209,
224
- "serpen": 210,
225
- "serpent</w>": 211,
226
- "dance</w>": 212,
227
- "ste": 213,
228
- "step</w>": 214,
229
- "pre": 215,
230
- "predi": 216,
231
- "tin": 217,
232
- "tinda": 218,
233
- "tindakan</w>": 219,
234
- "beri": 220,
235
- "beriku": 221,
236
- "berikut": 222,
237
- "berikutn": 223,
238
- "berikutnya</w>": 224,
239
- "tae": 225,
240
- "taeul": 226,
241
- "taeul_": 227,
242
- "taeul_se": 228,
243
- "taeul_sect</w>": 229,
244
- "po": 230,
245
- "pol": 231,
246
- "pola</w>": 232,
247
- "jang</w>": 233,
248
- "hang": 234,
249
- "hangi</w>": 235,
250
- "ad": 236,
251
- "ada</w>": 237,
252
- "bar": 238,
253
- "baru</w>": 239,
254
- "pat": 240,
255
- "patter": 241,
256
- "pattern</w>": 242,
257
- "terpi": 243,
258
- "terpisa": 244,
259
- "terpisah</w>": 245,
260
- "com": 246,
261
- "comp": 247,
262
- "as": 248,
263
- "dete": 249,
264
- "deteksi</w>": 250,
265
- "gu</w>": 251,
266
- "ilm": 252,
267
- "ilmu</w>": 253,
268
- "ketida": 254,
269
- "ketidak": 255,
270
- "ketidakse": 256,
271
- "ketidaksesu": 257,
272
- "ketidaksesuai": 258,
273
- "ketidaksesuaian</w>": 259,
274
- "terk": 260,
275
- "terkai": 261,
276
- "terkait</w>": 262,
277
- "lap": 263,
278
- "lapor": 264,
279
- "laporan</w>": 265,
280
  "hu": 266,
281
  "hubu": 267,
282
- "ela": 268,
283
- "dar": 269,
284
- "dark": 270,
285
- "dark_": 271,
286
- "dark_f": 272,
287
- "dark_fa": 273,
288
- "dark_fac": 274,
289
- "dark_faction</w>": 275,
290
- "at</w>": 276,
291
- "anomaly</w>": 277,
292
- "ban": 278,
293
- "bandi": 279,
294
- "bandingkan</w>": 280,
295
- "tang": 281,
296
- "tangg": 282,
297
- "tanggal</w>": 283,
298
- "hefei": 284,
299
- "hefei_": 285,
300
- "hefei_b": 286,
301
- "hefei_br": 287,
302
- "hefei_branc": 288,
303
- "hefei_branch</w>": 289,
304
- "deng": 290,
305
- "dengan</w>": 291,
306
- "hubungkan</w>": 292,
307
- "fra": 293,
308
- "frag": 294,
309
- "fragme": 295,
310
- "fragmen</w>": 296,
311
- "pencuri</w>": 297,
312
- "compos": 298,
313
- "compose</w>": 299,
314
- "susu": 300,
315
- "susun</w>": 301,
316
- "rec": 302,
317
- "recal": 303,
318
- "recall</w>": 304,
319
- "ing": 305,
320
- "ingat</w>": 306,
321
- "semu": 307,
322
- "semua</w>": 308,
323
- "predict</w>": 309,
324
- "perk": 310,
325
- "perki": 311,
326
- "perkira": 312,
327
- "perkirakan</w>": 313,
328
- "veri": 314,
329
- "verif": 315,
330
- "verify</w>": 316,
331
- "cek</w>": 317,
332
- "konsi": 318,
333
- "konsis": 319,
334
- "konsist": 320,
335
- "konsisten": 321,
336
- "konsistensi</w>": 322,
337
- "konsum": 323,
338
- "konsumsi</w>": 324,
339
- "pa</w>": 325,
340
- "men": 326,
341
- "ting": 327,
342
- "fil": 328,
343
- "filte": 329,
344
- "filter</w>": 330,
345
- "eli": 331,
346
- "elim": 332,
347
- "elimin": 333,
348
- "eliminasi</w>": 334,
349
- "rele": 335,
350
- "relev": 336,
351
- "relevan</w>": 337,
352
- "pil</w>": 338,
353
- "pasa": 339,
354
- "pasar</w>": 340,
355
- "gela": 341,
356
- "gelap</w>": 342,
357
- "suc": 343,
358
- "succe": 344,
359
- "succes": 345,
360
- "success</w>": 346,
361
- "rat": 347,
362
- "rate</w>": 348,
363
- "pai": 349,
364
- "pair</w>": 350,
365
- "lebi": 351,
366
- "lebih</w>": 352,
367
- "tingg": 353,
368
- "tinggi</w>": 354,
369
- "bias": 355,
370
- "biasan": 356,
371
- "biasanya</w>": 357,
372
- "dala": 358,
373
- "dalam</w>": 359,
374
- "ber": 360,
375
- "pencur": 361,
376
- "pencuri": 362,
377
- "pencurian</w>": 363,
378
- "ka</w>": 364,
379
- "tan": 365,
380
- "tanpa</w>": 366,
381
- "je": 367,
382
- "jeja": 368,
383
- "jejak</w>": 369,
384
- "perg": 370,
385
- "perger": 371,
386
- "pergera": 372,
387
- "pergerakan</w>": 373,
388
- "masi</w>": 374,
389
- "inv": 375,
390
- "inve": 376,
391
- "inves": 377,
392
- "investi": 378,
393
- "investig": 379,
394
- "investigasi</w>": 380,
395
- "hari</w>": 381,
396
- "sam": 382,
397
- "sama</w>": 383,
398
- "dat": 384,
399
- "data</w>": 385,
400
- "menu": 386,
401
- "menun": 387,
402
- "menunj": 388,
403
- "menunju": 389,
404
- "menunjuk": 390,
405
- "menunjukkan</w>": 391,
406
- "ca": 392,
407
- "mi": 393,
408
- "misi</w>": 394,
409
- "assi": 395,
410
- "assig": 396,
411
- "assign</w>": 397,
412
- "sen": 398,
413
- "sendi": 399,
414
- "sendiri</w>": 400,
415
- "ka": 401,
416
- "sete": 402,
417
- "setela": 403,
418
- "setelah</w>": 404,
419
- "temu": 405,
420
- "ke</w>": 406,
421
- "sumb": 407,
422
- "sumbe": 408,
423
- "sumber</w>": 409,
424
- "prediksi</w>": 410,
425
- "ters": 411,
426
- "tersang": 412,
427
- "tersangka</w>": 413,
428
- "penal": 414,
429
- "penalar": 415,
430
- "penalaran</w>": 416,
431
- "menja": 417,
432
- "menjadi</w>": 418,
433
- "kun": 419,
434
- "kunc": 420,
435
- "kunci</w>": 421,
436
- "hasi": 422,
437
- "hasil</w>": 423,
438
- "inf": 424,
439
- "infor": 425,
440
- "informasi</w>": 426,
441
- "anomali</w>": 427,
442
- "ya": 428,
443
- "temuan</w>": 429,
444
- "berk": 430,
445
- "berkor": 431,
446
- "berkorela": 432,
447
- "berkorelasi</w>": 433,
448
- "cata": 434,
449
- "catat": 435,
450
- "catatan</w>": 436,
451
- "sia": 437,
452
- "siapa</w>": 438,
453
- "mencu": 439,
454
- "mencuri</w>": 440,
455
- "terse": 441,
456
- "tersedi": 442,
457
- "tersedia</w>": 443,
458
- "mem": 444,
459
- "memili": 445,
460
- "memilik": 446,
461
- "memiliki</w>": 447,
462
- "kone": 448,
463
- "koneksi</w>": 449,
464
- "con": 450,
465
- "confi": 451,
466
- "confid": 452,
467
- "confidence</w>": 453,
468
- "mengin": 454,
469
- "mengindi": 455,
470
- "mengindika": 456,
471
- "mengindikasi": 457,
472
- "mengindikasikan</w>": 458,
473
- "pene": 459,
474
- "penelu": 460,
475
- "penelusu": 461,
476
- "penelusur": 462,
477
- "penelusuran</w>": 463,
478
- "log": 464,
479
- "logi": 465,
480
- "logika</w>": 466,
481
- "pr": 467,
482
- "pro": 468,
483
- "prose": 469,
484
- "proses</w>": 470,
485
- "ded": 471,
486
- "dedu": 472,
487
- "deduksi</w>": 473,
488
- "mengkon": 474,
489
- "mengkonfi": 475,
490
- "mengkonfir": 476,
491
- "mengkonfirmasi</w>": 477,
492
- "comple": 478,
493
- "completion</w>": 479,
494
- "ba": 480,
495
- "bah": 481,
496
- "bahw": 482,
497
- "bahwa</w>": 483,
498
- "ev": 484,
499
- "eval": 485,
500
- "evalu": 486,
501
- "evaluasi</w>": 487,
502
- "keper": 488,
503
- "keperca": 489,
504
- "kepercaya": 490,
505
- "kepercayaan</w>": 491,
506
- "berta": 492,
507
- "bertaha": 493,
508
- "bertahap</w>": 494,
509
- "insi": 495,
510
- "insid": 496,
511
- "inside</w>": 497,
512
- "jo": 498,
513
- "job</w>": 499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
  },
515
  "merges": {
516
  "a|||n": 0,
517
  "a|||n</w>": 1,
518
  "e|||r": 2,
519
- "e|||n": 3,
520
- "d|||a": 4,
521
  "t|||i": 5,
522
  "i|||l": 6,
523
  "s|||i": 7,
524
- "d|||i": 8,
525
- "an|||g</w>": 9,
526
- "s|||i</w>": 10,
527
  "an|||c": 11,
528
- "k|||an</w>": 12,
529
- "a|||l": 13,
530
- "s|||u": 14,
531
- "an|||g": 15,
532
- "r|||i</w>": 16,
533
- "k|||e": 17,
534
- "e|||f": 18,
535
  "t|||er": 19,
536
  "s|||e": 20,
537
- "t|||e": 21,
538
- "p|||a": 22,
539
- "n|||g": 23,
540
- "o|||n</w>": 24,
541
  "o|||n": 25,
542
- "h|||ef": 26,
543
- "hef|||e": 27,
544
- "en|||c": 28,
545
- "o|||r": 29,
546
- "l|||a": 30,
547
- "si|||m": 31,
548
- "u|||l": 32,
549
- "ti|||da": 33,
550
- "a|||r": 34,
551
- "en|||g": 35,
552
- "da|||ri</w>": 36,
553
- "r|||e": 37,
554
- "b|||u": 38,
555
- "anc|||e</w>": 39,
556
- "r|||a": 40,
557
- "o|||m": 41,
558
- "hefe|||i</w>": 42,
559
- "j|||ang": 43,
560
- "s|||a": 44,
561
- "j|||u</w>": 45,
562
- "jang|||m": 46,
563
- "jangm|||o": 47,
564
- "jangmo|||k</w>": 48,
565
- "a|||l</w>": 49,
566
- "o|||s": 50,
567
- "di|||anc": 51,
568
- "dianc|||ang</w>": 52,
569
- "a|||i": 53,
570
- "i|||n": 54,
571
- "j|||a": 55,
572
- "k|||on": 56,
573
- "l|||i": 57,
574
- "c|||t</w>": 58,
575
- "tida|||k</w>": 59,
576
- "er|||i": 60,
577
- "f|||i": 61,
578
- "m|||eng": 62,
579
- "a|||si</w>": 63,
580
- "ke|||sim": 64,
581
- "kesim|||p": 65,
582
- "kesimp|||ul": 66,
583
- "kesimpul|||an</w>": 67,
584
- "d|||i</w>": 68,
585
- "ng|||kan</w>": 69,
586
- "k|||si</w>": 70,
587
- "p|||i": 71,
588
- "y|||a</w>": 72,
589
- "y|||ang</w>": 73,
590
- "enc|||u": 74,
591
- "t|||a": 75,
592
- "bu|||k": 76,
593
- "buk|||t": 77,
594
- "bukt|||i</w>": 78,
595
- "p|||en": 79,
596
- "p|||er": 80,
597
- "l|||u": 81,
598
- "l|||e": 82,
599
- "fi|||v": 83,
600
- "fiv|||e</w>": 84,
601
- "s|||w": 85,
602
- "sw|||or": 86,
603
- "swor|||d": 87,
604
- "sword|||s</w>": 88,
605
- "p|||encu": 89,
606
- "enc|||e</w>": 90,
607
- "c|||e": 91,
608
- "k|||u": 92,
609
- "il|||i": 93,
610
- "s|||n": 94,
611
- "sn|||o": 95,
612
- "sno|||w</w>": 96,
613
- "p|||lu": 97,
614
- "plu|||m</w>": 98,
615
- "p|||il": 99,
616
- "pil|||l</w>": 100,
617
- "meng|||h": 101,
618
- "mengh|||il": 102,
619
- "menghil|||ang</w>": 103,
620
- "l|||o": 104,
621
- "b|||i": 105,
622
- "d|||e": 106,
623
- "an|||om": 107,
624
- "anom|||al": 108,
625
- "m|||ar": 109,
626
- "mar|||ti": 110,
627
- "marti|||al</w>": 111,
628
- "al|||li": 112,
629
- "alli|||ance</w>": 113,
630
- "m|||u": 114,
631
- "an|||al": 115,
632
- "anal|||i": 116,
633
- "anali|||si": 117,
634
- "analisi|||s</w>": 118,
635
- "g|||y": 119,
636
- "gy|||er": 120,
637
- "gyer|||y": 121,
638
- "gyery|||on": 122,
639
- "gyeryon|||g</w>": 123,
640
- "m|||er": 124,
641
- "mer|||c": 125,
642
- "merc|||h": 126,
643
- "merch|||an": 127,
644
- "merchan|||t</w>": 128,
645
- "g|||u": 129,
646
- "gu|||il": 130,
647
- "guil|||d</w>": 131,
648
- "h|||a": 132,
649
- "c|||r": 133,
650
- "cr|||os": 134,
651
- "cros|||s</w>": 135,
652
- "r|||ef": 136,
653
- "ref|||er": 137,
654
- "refer|||ence</w>": 138,
655
- "ke|||ja": 139,
656
- "keja|||di": 140,
657
- "kejadi|||an</w>": 141,
658
- "sim|||h": 142,
659
- "simh|||y": 143,
660
- "simhy|||e": 144,
661
- "simhye|||on</w>": 145,
662
- "pa|||v": 146,
663
- "pav|||ili": 147,
664
- "pavili|||on</w>": 148,
665
- "m|||e": 149,
666
- "ti|||on</w>": 150,
667
- "su|||m": 151,
668
- "b|||lo": 152,
669
- "blo|||o": 153,
670
- "bloo|||d</w>": 154,
671
- "s|||er": 155,
672
- "ser|||pen": 156,
673
- "serpen|||t</w>": 157,
674
- "d|||ance</w>": 158,
675
- "s|||te": 159,
676
- "ste|||p</w>": 160,
677
- "p|||re": 161,
678
- "pre|||di": 162,
679
- "ti|||n": 163,
680
- "tin|||da": 164,
681
- "tinda|||kan</w>": 165,
682
- "b|||eri": 166,
683
- "beri|||ku": 167,
684
- "beriku|||t": 168,
685
- "berikut|||n": 169,
686
- "berikutn|||ya</w>": 170,
687
- "ta|||e": 171,
688
- "tae|||ul": 172,
689
- "taeul|||_": 173,
690
- "taeul_|||se": 174,
691
- "taeul_se|||ct</w>": 175,
692
- "p|||o": 176,
693
- "po|||l": 177,
694
- "pol|||a</w>": 178,
695
- "j|||ang</w>": 179,
696
- "h|||ang": 180,
697
- "hang|||i</w>": 181,
698
- "a|||d": 182,
699
- "ad|||a</w>": 183,
700
- "b|||ar": 184,
701
- "bar|||u</w>": 185,
702
- "pa|||t": 186,
703
- "pat|||ter": 187,
704
- "patter|||n</w>": 188,
705
- "ter|||pi": 189,
706
- "terpi|||sa": 190,
707
- "terpisa|||h</w>": 191,
708
- "c|||om": 192,
709
- "com|||p": 193,
710
- "a|||s": 194,
711
- "de|||te": 195,
712
- "dete|||ksi</w>": 196,
713
- "g|||u</w>": 197,
714
- "il|||m": 198,
715
- "ilm|||u</w>": 199,
716
- "ke|||tida": 200,
717
- "ketida|||k": 201,
718
- "ketidak|||se": 202,
719
- "ketidakse|||su": 203,
720
- "ketidaksesu|||ai": 204,
721
- "ketidaksesuai|||an</w>": 205,
722
- "ter|||k": 206,
723
- "terk|||ai": 207,
724
- "terkai|||t</w>": 208,
725
- "la|||p": 209,
726
- "lap|||or": 210,
727
- "lapor|||an</w>": 211,
728
- "h|||u": 212,
729
- "hu|||bu": 213,
730
- "e|||la": 214,
731
- "da|||r": 215,
732
- "dar|||k": 216,
733
- "dark|||_": 217,
734
- "dark_|||f": 218,
735
- "dark_f|||a": 219,
736
- "dark_fa|||c": 220,
737
- "dark_fac|||tion</w>": 221,
738
- "a|||t</w>": 222,
739
- "anomal|||y</w>": 223,
740
- "b|||an": 224,
741
- "ban|||di": 225,
742
- "bandi|||ngkan</w>": 226,
743
- "t|||ang": 227,
744
- "tang|||g": 228,
745
- "tangg|||al</w>": 229,
746
- "hefe|||i": 230,
747
- "hefei|||_": 231,
748
- "hefei_|||b": 232,
749
- "hefei_b|||r": 233,
750
- "hefei_br|||anc": 234,
751
- "hefei_branc|||h</w>": 235,
752
- "d|||eng": 236,
753
- "deng|||an</w>": 237,
754
- "hubu|||ngkan</w>": 238,
755
- "f|||ra": 239,
756
- "fra|||g": 240,
757
- "frag|||me": 241,
758
- "fragme|||n</w>": 242,
759
- "pencu|||ri</w>": 243,
760
- "comp|||os": 244,
761
- "compos|||e</w>": 245,
762
- "su|||su": 246,
763
- "susu|||n</w>": 247,
764
- "re|||c": 248,
765
- "rec|||al": 249,
766
- "recal|||l</w>": 250,
767
- "i|||ng": 251,
768
- "ing|||at</w>": 252,
769
- "se|||mu": 253,
770
- "semu|||a</w>": 254,
771
- "predi|||ct</w>": 255,
772
- "per|||k": 256,
773
- "perk|||i": 257,
774
- "perki|||ra": 258,
775
- "perkira|||kan</w>": 259,
776
- "v|||eri": 260,
777
- "veri|||f": 261,
778
- "verif|||y</w>": 262,
779
- "ce|||k</w>": 263,
780
- "kon|||si": 264,
781
- "konsi|||s": 265,
782
- "konsis|||t": 266,
783
- "konsist|||en": 267,
784
- "konsisten|||si</w>": 268,
785
- "kon|||sum": 269,
786
- "konsum|||si</w>": 270,
787
- "p|||a</w>": 271,
788
- "m|||en": 272,
789
- "ti|||ng": 273,
790
- "f|||il": 274,
791
- "fil|||te": 275,
792
- "filte|||r</w>": 276,
793
- "e|||li": 277,
794
- "eli|||m": 278,
795
- "elim|||in": 279,
796
- "elimin|||asi</w>": 280,
797
- "re|||le": 281,
798
- "rele|||v": 282,
799
- "relev|||an</w>": 283,
800
- "pi|||l</w>": 284,
801
- "pa|||sa": 285,
802
- "pasa|||r</w>": 286,
803
- "g|||ela": 287,
804
- "gela|||p</w>": 288,
805
- "su|||c": 289,
806
- "suc|||ce": 290,
807
- "succe|||s": 291,
808
- "succes|||s</w>": 292,
809
- "ra|||t": 293,
810
- "rat|||e</w>": 294,
811
- "pa|||i": 295,
812
- "pai|||r</w>": 296,
813
- "le|||bi": 297,
814
- "lebi|||h</w>": 298,
815
- "ting|||g": 299,
816
- "tingg|||i</w>": 300,
817
- "bi|||as": 301,
818
- "bias|||an": 302,
819
- "biasan|||ya</w>": 303,
820
- "da|||la": 304,
821
- "dala|||m</w>": 305,
822
- "b|||er": 306,
823
- "pencu|||r": 307,
824
- "pencur|||i": 308,
825
- "pencuri|||an</w>": 309,
826
- "k|||a</w>": 310,
827
- "t|||an": 311,
828
- "tan|||pa</w>": 312,
829
- "j|||e": 313,
830
- "je|||ja": 314,
831
- "jeja|||k</w>": 315,
832
- "per|||g": 316,
833
- "perg|||er": 317,
834
- "perger|||a": 318,
835
- "pergera|||kan</w>": 319,
836
- "m|||asi</w>": 320,
837
- "in|||v": 321,
838
- "inv|||e": 322,
839
- "inve|||s": 323,
840
- "inves|||ti": 324,
841
- "investi|||g": 325,
842
- "investig|||asi</w>": 326,
843
- "ha|||ri</w>": 327,
844
- "sa|||m": 328,
845
- "sam|||a</w>": 329,
846
- "da|||t": 330,
847
- "dat|||a</w>": 331,
848
- "men|||u": 332,
849
- "menu|||n": 333,
850
- "menun|||j": 334,
851
- "menunj|||u": 335,
852
- "menunju|||k": 336,
853
- "menunjuk|||kan</w>": 337,
854
- "c|||a": 338,
855
- "m|||i": 339,
856
- "mi|||si</w>": 340,
857
- "as|||si": 341,
858
- "assi|||g": 342,
859
- "assig|||n</w>": 343,
860
- "s|||en": 344,
861
- "sen|||di": 345,
862
- "sendi|||ri</w>": 346,
863
- "k|||a": 347,
864
- "se|||te": 348,
865
- "sete|||la": 349,
866
- "setela|||h</w>": 350,
867
- "te|||mu": 351,
868
- "k|||e</w>": 352,
869
- "sum|||b": 353,
870
- "sumb|||e": 354,
871
- "sumbe|||r</w>": 355,
872
- "predi|||ksi</w>": 356,
873
- "ter|||s": 357,
874
- "ters|||ang": 358,
875
- "tersang|||ka</w>": 359,
876
- "pen|||al": 360,
877
- "penal|||ar": 361,
878
- "penalar|||an</w>": 362,
879
- "men|||ja": 363,
880
- "menja|||di</w>": 364,
881
- "ku|||n": 365,
882
- "kun|||c": 366,
883
- "kunc|||i</w>": 367,
884
- "ha|||si": 368,
885
- "hasi|||l</w>": 369,
886
- "in|||f": 370,
887
- "inf|||or": 371,
888
- "infor|||masi</w>": 372,
889
- "anomal|||i</w>": 373,
890
- "y|||a": 374,
891
- "temu|||an</w>": 375,
892
- "ber|||k": 376,
893
- "berk|||or": 377,
894
- "berkor|||ela": 378,
895
- "berkorela|||si</w>": 379,
896
- "ca|||ta": 380,
897
- "cata|||t": 381,
898
- "catat|||an</w>": 382,
899
- "si|||a": 383,
900
- "sia|||pa</w>": 384,
901
- "m|||encu": 385,
902
- "mencu|||ri</w>": 386,
903
- "ter|||se": 387,
904
- "terse|||di": 388,
905
- "tersedi|||a</w>": 389,
906
- "me|||m": 390,
907
- "mem|||ili": 391,
908
- "memili|||k": 392,
909
- "memilik|||i</w>": 393,
910
- "kon|||e": 394,
911
- "kone|||ksi</w>": 395,
912
- "c|||on": 396,
913
- "con|||fi": 397,
914
- "confi|||d": 398,
915
- "confid|||ence</w>": 399,
916
- "meng|||in": 400,
917
- "mengin|||di": 401,
918
- "mengindi|||ka": 402,
919
- "mengindika|||si": 403,
920
- "mengindikasi|||kan</w>": 404,
921
- "pen|||e": 405,
922
- "pene|||lu": 406,
923
- "penelu|||su": 407,
924
- "penelusu|||r": 408,
925
- "penelusur|||an</w>": 409,
926
- "lo|||g": 410,
927
- "log|||i": 411,
928
- "logi|||ka</w>": 412,
929
- "p|||r": 413,
930
- "pr|||o": 414,
931
- "pro|||se": 415,
932
- "prose|||s</w>": 416,
933
- "de|||d": 417,
934
- "ded|||u": 418,
935
- "dedu|||ksi</w>": 419,
936
- "meng|||kon": 420,
937
- "mengkon|||fi": 421,
938
- "mengkonfi|||r": 422,
939
- "mengkonfir|||masi</w>": 423,
940
- "comp|||le": 424,
941
- "comple|||tion</w>": 425,
942
- "b|||a": 426,
943
- "ba|||h": 427,
944
- "bah|||w": 428,
945
- "bahw|||a</w>": 429,
946
- "e|||v": 430,
947
- "ev|||al": 431,
948
- "eval|||u": 432,
949
- "evalu|||asi</w>": 433,
950
- "ke|||per": 434,
951
- "keper|||ca": 435,
952
- "keperca|||ya": 436,
953
- "kepercaya|||an</w>": 437,
954
- "ber|||ta": 438,
955
- "berta|||ha": 439,
956
- "bertaha|||p</w>": 440,
957
- "in|||si": 441,
958
- "insi|||d": 442,
959
- "insid|||e</w>": 443,
960
- "j|||o": 444,
961
- "jo|||b</w>": 445
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
962
  },
963
  "is_trained": true
964
  }
 
1
  {
2
  "config": {
3
+ "bpe_vocab_size": 7987,
4
  "max_sentences": 32,
5
  "sentence_boundary_token": "<sent>",
6
  "pad_token": "<pad>",
 
33
  "2": 19,
34
  "3": 20,
35
  "4": 21,
36
+ "5": 22,
37
+ "6": 23,
38
+ "7": 24,
39
+ "8": 25,
40
+ "9": 26,
41
+ ":": 27,
42
+ ";": 28,
43
+ "?": 29,
44
+ "_": 30,
45
+ "a": 31,
46
+ "b": 32,
47
+ "c": 33,
48
+ "d": 34,
49
+ "e": 35,
50
+ "f": 36,
51
+ "g": 37,
52
+ "h": 38,
53
+ "i": 39,
54
+ "j": 40,
55
+ "k": 41,
56
+ "l": 42,
57
+ "m": 43,
58
+ "n": 44,
59
+ "o": 45,
60
+ "p": 46,
61
+ "r": 47,
62
+ "s": 48,
63
+ "t": 49,
64
+ "u": 50,
65
+ "v": 51,
66
+ "w": 52,
67
+ "y": 53,
68
+ "z": 54,
69
+ "an": 55,
70
+ "an</w>": 56,
71
+ "er": 57,
72
  "da": 58,
73
+ "en": 59,
74
+ "ti": 60,
75
+ "il": 61,
76
+ "si": 62,
77
+ "si</w>": 63,
78
+ "di": 64,
79
+ "ang</w>": 65,
80
+ "anc": 66,
81
  "al": 67,
82
+ "kan</w>": 68,
83
+ "ke": 69,
84
+ "su": 70,
85
+ "ef": 71,
86
+ "ang": 72,
87
+ "ri</w>": 73,
88
+ "ter": 74,
89
+ "se": 75,
90
+ "on</w>": 76,
91
+ "te": 77,
92
+ "hef": 78,
93
+ "hefe": 79,
94
+ "on": 80,
95
+ "ng": 81,
96
+ "po": 82,
97
+ "sim": 83,
98
+ "ul": 84,
99
+ "re": 85,
100
+ "pa": 86,
101
+ "ar": 87,
102
+ "pen": 88,
103
+ "la": 89,
104
  "dari</w>": 90,
105
+ "bu": 91,
106
+ "men": 92,
107
+ "tida": 93,
108
+ "om": 94,
109
+ "hefei</w>": 95,
110
+ "ance</w>": 96,
111
+ "in": 97,
112
+ "asi</w>": 98,
113
+ "ra": 99,
114
+ "kesim": 100,
115
+ "kesimp": 101,
116
+ "kesimpul": 102,
117
+ "kesimpulan</w>": 103,
118
+ "eri": 104,
119
+ "ai": 105,
120
+ "jang": 106,
121
+ "al</w>": 107,
122
+ "ct</w>": 108,
123
+ "kon": 109,
124
+ "dianc": 110,
125
+ "diancang</w>": 111,
126
+ "ju</w>": 112,
127
+ "jangm": 113,
128
+ "jangmo": 114,
129
+ "jangmok</w>": 115,
130
+ "li": 116,
131
+ "or": 117,
132
+ "fi": 118,
133
+ "sa": 119,
134
+ "ta": 120,
135
+ "meng": 121,
136
+ "ksi</w>": 122,
137
  "ngkan</w>": 123,
138
+ "pl": 124,
139
+ "ja": 125,
140
+ "buk": 126,
141
+ "bukt": 127,
142
+ "bukti</w>": 128,
143
+ "tidak</w>": 129,
144
+ "ya</w>": 130,
145
+ "yang</w>": 131,
146
+ "di</w>": 132,
147
+ "per": 133,
148
+ "sn": 134,
149
+ "sno": 135,
150
+ "snow</w>": 136,
151
+ "plu": 137,
152
+ "plum</w>": 138,
153
+ "pil": 139,
154
+ "pill</w>": 140,
155
+ "ss</w>": 141,
156
+ "pi": 142,
157
+ "cu": 143,
158
+ "ku": 144,
159
+ "ili": 145,
160
+ "fiv": 146,
161
+ "five</w>": 147,
162
+ "sw": 148,
163
+ "swor": 149,
164
+ "sword": 150,
165
+ "swords</w>": 151,
166
+ "tion</w>": 152,
167
+ "enc": 153,
168
+ "ence</w>": 154,
169
+ "ran</w>": 155,
170
+ "ce": 156,
171
+ "ro": 157,
172
  "lo": 158,
173
+ "mu": 159,
174
+ "pencu": 160,
175
+ "com": 161,
176
+ "pol": 162,
177
+ "pola</w>": 163,
178
+ "anom": 164,
179
+ "anomal": 165,
180
+ "gy": 166,
181
+ "gyer": 167,
182
+ "gyery": 168,
183
+ "gyeryon": 169,
184
+ "gyeryong</w>": 170,
185
+ "mer": 171,
186
+ "merc": 172,
187
+ "merch": 173,
188
+ "merchan": 174,
189
+ "merchant</w>": 175,
190
+ "gu": 176,
191
+ "guil": 177,
192
+ "guild</w>": 178,
193
+ "de": 179,
194
+ "mar": 180,
195
+ "marti": 181,
196
+ "martial</w>": 182,
197
+ "alli": 183,
198
+ "alliance</w>": 184,
199
+ "pre": 185,
200
+ "predi": 186,
201
+ "tin": 187,
202
+ "tinda": 188,
203
+ "tindakan</w>": 189,
204
+ "beri": 190,
205
+ "beriku": 191,
206
+ "berikut": 192,
207
+ "berikutn": 193,
208
+ "berikutnya</w>": 194,
209
+ "simh": 195,
210
+ "simhy": 196,
211
+ "simhye": 197,
212
+ "simhyeon</w>": 198,
213
+ "pav": 199,
214
+ "pavili": 200,
215
+ "pavilion</w>": 201,
216
+ "jang</w>": 202,
217
+ "hang": 203,
218
+ "hangi</w>": 204,
219
+ "tae": 205,
220
+ "taeul": 206,
221
+ "taeul_": 207,
222
+ "taeul_se": 208,
223
+ "taeul_sect</w>": 209,
224
+ "at</w>": 210,
225
+ "lapo": 211,
226
+ "laporan</w>": 212,
227
+ "gu</w>": 213,
228
+ "ilm": 214,
229
+ "ilmu</w>": 215,
230
+ "terk": 216,
231
+ "terkai": 217,
232
+ "terkait</w>": 218,
233
+ "le": 219,
234
+ "dar": 220,
235
+ "dark": 221,
236
+ "dark_": 222,
237
+ "dark_f": 223,
238
+ "dark_fa": 224,
239
+ "dark_fac": 225,
240
+ "dark_faction</w>": 226,
241
+ "sum": 227,
242
+ "blo": 228,
243
+ "bloo": 229,
244
+ "blood</w>": 230,
245
+ "ser": 231,
246
+ "serpen": 232,
247
+ "serpent</w>": 233,
248
+ "dance</w>": 234,
249
+ "ste": 235,
250
+ "step</w>": 236,
251
+ "anal": 237,
252
+ "anali": 238,
253
+ "analisi": 239,
254
+ "analisis</w>": 240,
255
+ "cro": 241,
256
+ "cross</w>": 242,
257
+ "ref": 243,
258
+ "refer": 244,
259
+ "reference</w>": 245,
260
+ "keja": 246,
261
+ "kejadi": 247,
262
+ "kejadian</w>": 248,
263
+ "mengh": 249,
264
+ "menghil": 250,
265
+ "menghilang</w>": 251,
266
+ "me": 252,
267
+ "ha": 253,
268
+ "pat": 254,
269
+ "patter": 255,
270
+ "pattern</w>": 256,
271
+ "terpi": 257,
272
+ "terpisa": 258,
273
+ "terpisah</w>": 259,
274
+ "hefei": 260,
275
+ "hefei_": 261,
276
+ "hefei_b": 262,
277
+ "hefei_br": 263,
278
+ "hefei_branc": 264,
279
+ "hefei_branch</w>": 265,
280
  "hu": 266,
281
  "hubu": 267,
282
+ "compo": 268,
283
+ "compos": 269,
284
+ "compose</w>": 270,
285
+ "susu": 271,
286
+ "susun</w>": 272,
287
+ "dete": 273,
288
+ "deteksi</w>": 274,
289
+ "ketida": 275,
290
+ "ketidak": 276,
291
+ "ketidakse": 277,
292
+ "ketidaksesu": 278,
293
+ "ketidaksesuai": 279,
294
+ "ketidaksesuaian</w>": 280,
295
+ "rec": 281,
296
+ "recal": 282,
297
+ "recall</w>": 283,
298
+ "ing": 284,
299
+ "ingat</w>": 285,
300
+ "semu": 286,
301
+ "semua</w>": 287,
302
+ "predict</w>": 288,
303
+ "perk": 289,
304
+ "perki": 290,
305
+ "perkira": 291,
306
+ "perkirakan</w>": 292,
307
+ "veri": 293,
308
+ "verif": 294,
309
+ "verify</w>": 295,
310
+ "cek</w>": 296,
311
+ "konsi": 297,
312
+ "konsis": 298,
313
+ "konsist": 299,
314
+ "konsisten": 300,
315
+ "konsistensi</w>": 301,
316
+ "anomaly</w>": 302,
317
+ "ban": 303,
318
+ "bandi": 304,
319
+ "bandingkan</w>": 305,
320
+ "tang": 306,
321
+ "tangg": 307,
322
+ "tanggal</w>": 308,
323
+ "hubungkan</w>": 309,
324
+ "fra": 310,
325
+ "frag": 311,
326
+ "fragme": 312,
327
+ "fragmen</w>": 313,
328
+ "as": 314,
329
+ "den": 315,
330
+ "deng": 316,
331
+ "dengan</w>": 317,
332
+ "ad": 318,
333
+ "ada</w>": 319,
334
+ "bar": 320,
335
+ "baru</w>": 321,
336
+ "bi": 322,
337
+ "fil": 323,
338
+ "filte": 324,
339
+ "filter</w>": 325,
340
+ "eli": 326,
341
+ "elim": 327,
342
+ "elimin": 328,
343
+ "eliminasi</w>": 329,
344
+ "rele": 330,
345
+ "relev": 331,
346
+ "relevan</w>": 332,
347
+ "masi</w>": 333,
348
+ "pencuri</w>": 334,
349
+ "ber": 335,
350
+ "dala": 336,
351
+ "dalam</w>": 337,
352
+ "konsum": 338,
353
+ "konsumsi</w>": 339,
354
+ "ting": 340,
355
+ "pa</w>": 341,
356
+ "pencur": 342,
357
+ "pencuri": 343,
358
+ "pencurian</w>": 344,
359
+ "ca": 345,
360
+ "sumb": 346,
361
+ "sumbe": 347,
362
+ "sumber</w>": 348,
363
+ "inf": 349,
364
+ "infor": 350,
365
+ "informasi</w>": 351,
366
+ "ke</w>": 352,
367
+ "ka</w>": 353,
368
+ "inv": 354,
369
+ "inve": 355,
370
+ "inves": 356,
371
+ "investi": 357,
372
+ "investig": 358,
373
+ "investigasi</w>": 359,
374
+ "ya": 360,
375
+ "mi": 361,
376
+ "misi</w>": 362,
377
+ "assi": 363,
378
+ "assig": 364,
379
+ "assign</w>": 365,
380
+ "sen": 366,
381
+ "sendi": 367,
382
+ "sendiri</w>": 368,
383
+ "ah</w>": 369,
384
+ "menu": 370,
385
+ "menun": 371,
386
+ "menunj": 372,
387
+ "menunju": 373,
388
+ "menunjuk": 374,
389
+ "menunjukkan</w>": 375,
390
+ "dat": 376,
391
+ "data</w>": 377,
392
+ "pil</w>": 378,
393
+ "pasa": 379,
394
+ "pasar</w>": 380,
395
+ "ge": 381,
396
+ "gela": 382,
397
+ "gelap</w>": 383,
398
+ "suc": 384,
399
+ "succe": 385,
400
+ "success</w>": 386,
401
+ "rat": 387,
402
+ "rate</w>": 388,
403
+ "pai": 389,
404
+ "pair</w>": 390,
405
+ "lebi": 391,
406
+ "lebih</w>": 392,
407
+ "tingg": 393,
408
+ "tinggi</w>": 394,
409
+ "bias": 395,
410
+ "biasan": 396,
411
+ "biasanya</w>": 397,
412
+ "penal": 398,
413
+ "penalar": 399,
414
+ "penalaran</w>": 400,
415
+ "sete": 401,
416
+ "setela": 402,
417
+ "setelah</w>": 403,
418
+ "hari</w>": 404,
419
+ "sam": 405,
420
+ "sama</w>": 406,
421
+ "tan": 407,
422
+ "tanpa</w>": 408,
423
+ "je": 409,
424
+ "jeja": 410,
425
+ "jejak</w>": 411,
426
+ "temu": 412,
427
+ "anomali</w>": 413,
428
+ "prediksi</w>": 414,
429
+ "ters": 415,
430
+ "tersang": 416,
431
+ "tersangka</w>": 417,
432
+ "temuan</w>": 418,
433
+ "berk": 419,
434
+ "berko": 420,
435
+ "berkore": 421,
436
+ "berkorela": 422,
437
+ "berkorelasi</w>": 423,
438
+ "cata": 424,
439
+ "catat": 425,
440
+ "catatan</w>": 426,
441
+ "perg": 427,
442
+ "perger": 428,
443
+ "pergera": 429,
444
+ "pergerakan</w>": 430,
445
+ "mengkon": 431,
446
+ "mengkonfi": 432,
447
+ "mengkonfir": 433,
448
+ "mengkonfirmasi</w>": 434,
449
+ "ev": 435,
450
+ "eval": 436,
451
+ "evalu": 437,
452
+ "evaluasi</w>": 438,
453
+ "keper": 439,
454
+ "keperca": 440,
455
+ "kepercaya": 441,
456
+ "kepercayaan</w>": 442,
457
+ "mengar": 443,
458
+ "mengarah</w>": 444,
459
+ "terse": 445,
460
+ "tersedi": 446,
461
+ "tersedia</w>": 447,
462
+ "mem": 448,
463
+ "memili": 449,
464
+ "memilik": 450,
465
+ "memiliki</w>": 451,
466
+ "kone": 452,
467
+ "koneksi</w>": 453,
468
+ "con": 454,
469
+ "confi": 455,
470
+ "confid": 456,
471
+ "confidence</w>": 457,
472
+ "menja": 458,
473
+ "menjadi</w>": 459,
474
+ "kun": 460,
475
+ "kunc": 461,
476
+ "kunci</w>": 462,
477
+ "hasi": 463,
478
+ "hasil</w>": 464,
479
+ "insi": 465,
480
+ "insid": 466,
481
+ "inside</w>": 467,
482
+ "jo": 468,
483
+ "job</w>": 469,
484
+ "compl": 470,
485
+ "comple": 471,
486
+ "completion</w>": 472,
487
+ "ran": 473,
488
+ "ranta": 474,
489
+ "rantai</w>": 475,
490
+ "berda": 476,
491
+ "berdas": 477,
492
+ "berdasar": 478,
493
+ "berdasarkan</w>": 479,
494
+ "terha": 480,
495
+ "terhada": 481,
496
+ "terhadap</w>": 482,
497
+ "tingk": 483,
498
+ "tingkat</w>": 484,
499
+ "keya": 485,
500
+ "keyak": 486,
501
+ "keyakin": 487,
502
+ "keyakinan</w>": 488,
503
+ "pro": 489,
504
+ "prose": 490,
505
+ "proses</w>": 491,
506
+ "ded": 492,
507
+ "dedu": 493,
508
+ "deduksi</w>": 494,
509
+ "ba": 495,
510
+ "bah": 496,
511
+ "bahw": 497,
512
+ "bahwa</w>": 498,
513
+ "sia": 499,
514
+ "siapa</w>": 500,
515
+ "mencu": 501,
516
+ "mencuri</w>": 502,
517
+ "berta": 503,
518
+ "bertaha": 504,
519
+ "bertahap</w>": 505,
520
+ "mengin": 506,
521
+ "mengindi": 507,
522
+ "mengindik": 508,
523
+ "mengindika": 509,
524
+ "mengindikasi": 510,
525
+ "mengindikasikan</w>": 511,
526
+ "pene": 512,
527
+ "penel": 513,
528
+ "penelu": 514,
529
+ "penelusu": 515,
530
+ "penelusuran</w>": 516,
531
+ "log": 517,
532
+ "logi": 518,
533
+ "logika</w>": 519,
534
+ "lang": 520,
535
+ "langk": 521,
536
+ "langkah</w>": 522,
537
+ "hubung": 523,
538
+ "hubungan</w>": 524,
539
+ "ant": 525,
540
+ "antar": 526,
541
+ "antara</w>": 527,
542
+ "dan</w>": 528,
543
+ "terdeteksi</w>": 529,
544
+ "perha": 530,
545
+ "perhati": 531,
546
+ "perhatian</w>": 532,
547
+ "terda": 533,
548
+ "terdapa": 534,
549
+ "terdapat</w>": 535,
550
+ "kejang": 536,
551
+ "kejangg": 537,
552
+ "kejanggal": 538,
553
+ "kejanggalan</w>": 539,
554
+ "laz": 540,
555
+ "lazi": 541,
556
+ "lazim</w>": 542,
557
+ "ditemu": 543,
558
+ "ditemukan</w>": 544,
559
+ "68</w>": 545,
560
+ "62</w>": 546,
561
+ "73</w>": 547,
562
+ "67</w>": 548,
563
+ "74</w>": 549,
564
+ "66</w>": 550,
565
+ "69</w>": 551,
566
+ "70</w>": 552,
567
+ "65</w>": 553,
568
+ "59</w>": 554,
569
+ "71</w>": 555,
570
+ "63</w>": 556,
571
+ "61</w>": 557,
572
+ "64</w>": 558,
573
+ "81</w>": 559,
574
+ "58</w>": 560,
575
+ "54</w>": 561,
576
+ "76</w>": 562,
577
+ "75</w>": 563,
578
+ "46</w>": 564,
579
+ "49</w>": 565,
580
+ "82</w>": 566,
581
+ "57</w>": 567,
582
+ "53</w>": 568,
583
+ "48</w>": 569,
584
+ "52</w>": 570,
585
+ "44</w>": 571,
586
+ "72</w>": 572,
587
+ "56</w>": 573,
588
+ "93</w>": 574,
589
+ "55</w>": 575
590
  },
591
  "merges": {
592
  "a|||n": 0,
593
  "a|||n</w>": 1,
594
  "e|||r": 2,
595
+ "d|||a": 3,
596
+ "e|||n": 4,
597
  "t|||i": 5,
598
  "i|||l": 6,
599
  "s|||i": 7,
600
+ "s|||i</w>": 8,
601
+ "d|||i": 9,
602
+ "an|||g</w>": 10,
603
  "an|||c": 11,
604
+ "a|||l": 12,
605
+ "k|||an</w>": 13,
606
+ "k|||e": 14,
607
+ "s|||u": 15,
608
+ "e|||f": 16,
609
+ "an|||g": 17,
610
+ "r|||i</w>": 18,
611
  "t|||er": 19,
612
  "s|||e": 20,
613
+ "o|||n</w>": 21,
614
+ "t|||e": 22,
615
+ "h|||ef": 23,
616
+ "hef|||e": 24,
617
  "o|||n": 25,
618
+ "n|||g": 26,
619
+ "p|||o": 27,
620
+ "si|||m": 28,
621
+ "u|||l": 29,
622
+ "r|||e": 30,
623
+ "p|||a": 31,
624
+ "a|||r": 32,
625
+ "p|||en": 33,
626
+ "l|||a": 34,
627
+ "da|||ri</w>": 35,
628
+ "b|||u": 36,
629
+ "m|||en": 37,
630
+ "ti|||da": 38,
631
+ "o|||m": 39,
632
+ "hefe|||i</w>": 40,
633
+ "anc|||e</w>": 41,
634
+ "i|||n": 42,
635
+ "a|||si</w>": 43,
636
+ "r|||a": 44,
637
+ "ke|||sim": 45,
638
+ "kesim|||p": 46,
639
+ "kesimp|||ul": 47,
640
+ "kesimpul|||an</w>": 48,
641
+ "er|||i": 49,
642
+ "a|||i": 50,
643
+ "j|||ang": 51,
644
+ "a|||l</w>": 52,
645
+ "c|||t</w>": 53,
646
+ "k|||on": 54,
647
+ "di|||anc": 55,
648
+ "dianc|||ang</w>": 56,
649
+ "j|||u</w>": 57,
650
+ "jang|||m": 58,
651
+ "jangm|||o": 59,
652
+ "jangmo|||k</w>": 60,
653
+ "l|||i": 61,
654
+ "o|||r": 62,
655
+ "f|||i": 63,
656
+ "s|||a": 64,
657
+ "t|||a": 65,
658
+ "men|||g": 66,
659
+ "k|||si</w>": 67,
660
+ "ng|||kan</w>": 68,
661
+ "p|||l": 69,
662
+ "j|||a": 70,
663
+ "bu|||k": 71,
664
+ "buk|||t": 72,
665
+ "bukt|||i</w>": 73,
666
+ "tida|||k</w>": 74,
667
+ "y|||a</w>": 75,
668
+ "y|||ang</w>": 76,
669
+ "d|||i</w>": 77,
670
+ "p|||er": 78,
671
+ "s|||n": 79,
672
+ "sn|||o": 80,
673
+ "sno|||w</w>": 81,
674
+ "pl|||u": 82,
675
+ "plu|||m</w>": 83,
676
+ "p|||il": 84,
677
+ "pil|||l</w>": 85,
678
+ "s|||s</w>": 86,
679
+ "p|||i": 87,
680
+ "c|||u": 88,
681
+ "k|||u": 89,
682
+ "il|||i": 90,
683
+ "fi|||v": 91,
684
+ "fiv|||e</w>": 92,
685
+ "s|||w": 93,
686
+ "sw|||or": 94,
687
+ "swor|||d": 95,
688
+ "sword|||s</w>": 96,
689
+ "ti|||on</w>": 97,
690
+ "en|||c": 98,
691
+ "enc|||e</w>": 99,
692
+ "r|||an</w>": 100,
693
+ "c|||e": 101,
694
+ "r|||o": 102,
695
+ "l|||o": 103,
696
+ "m|||u": 104,
697
+ "pen|||cu": 105,
698
+ "c|||om": 106,
699
+ "po|||l": 107,
700
+ "pol|||a</w>": 108,
701
+ "an|||om": 109,
702
+ "anom|||al": 110,
703
+ "g|||y": 111,
704
+ "gy|||er": 112,
705
+ "gyer|||y": 113,
706
+ "gyery|||on": 114,
707
+ "gyeryon|||g</w>": 115,
708
+ "m|||er": 116,
709
+ "mer|||c": 117,
710
+ "merc|||h": 118,
711
+ "merch|||an": 119,
712
+ "merchan|||t</w>": 120,
713
+ "g|||u": 121,
714
+ "gu|||il": 122,
715
+ "guil|||d</w>": 123,
716
+ "d|||e": 124,
717
+ "m|||ar": 125,
718
+ "mar|||ti": 126,
719
+ "marti|||al</w>": 127,
720
+ "al|||li": 128,
721
+ "alli|||ance</w>": 129,
722
+ "p|||re": 130,
723
+ "pre|||di": 131,
724
+ "ti|||n": 132,
725
+ "tin|||da": 133,
726
+ "tinda|||kan</w>": 134,
727
+ "b|||eri": 135,
728
+ "beri|||ku": 136,
729
+ "beriku|||t": 137,
730
+ "berikut|||n": 138,
731
+ "berikutn|||ya</w>": 139,
732
+ "sim|||h": 140,
733
+ "simh|||y": 141,
734
+ "simhy|||e": 142,
735
+ "simhye|||on</w>": 143,
736
+ "pa|||v": 144,
737
+ "pav|||ili": 145,
738
+ "pavili|||on</w>": 146,
739
+ "j|||ang</w>": 147,
740
+ "h|||ang": 148,
741
+ "hang|||i</w>": 149,
742
+ "ta|||e": 150,
743
+ "tae|||ul": 151,
744
+ "taeul|||_": 152,
745
+ "taeul_|||se": 153,
746
+ "taeul_se|||ct</w>": 154,
747
+ "a|||t</w>": 155,
748
+ "la|||po": 156,
749
+ "lapo|||ran</w>": 157,
750
+ "g|||u</w>": 158,
751
+ "il|||m": 159,
752
+ "ilm|||u</w>": 160,
753
+ "ter|||k": 161,
754
+ "terk|||ai": 162,
755
+ "terkai|||t</w>": 163,
756
+ "l|||e": 164,
757
+ "da|||r": 165,
758
+ "dar|||k": 166,
759
+ "dark|||_": 167,
760
+ "dark_|||f": 168,
761
+ "dark_f|||a": 169,
762
+ "dark_fa|||c": 170,
763
+ "dark_fac|||tion</w>": 171,
764
+ "su|||m": 172,
765
+ "b|||lo": 173,
766
+ "blo|||o": 174,
767
+ "bloo|||d</w>": 175,
768
+ "s|||er": 176,
769
+ "ser|||pen": 177,
770
+ "serpen|||t</w>": 178,
771
+ "d|||ance</w>": 179,
772
+ "s|||te": 180,
773
+ "ste|||p</w>": 181,
774
+ "an|||al": 182,
775
+ "anal|||i": 183,
776
+ "anali|||si": 184,
777
+ "analisi|||s</w>": 185,
778
+ "c|||ro": 186,
779
+ "cro|||ss</w>": 187,
780
+ "r|||ef": 188,
781
+ "ref|||er": 189,
782
+ "refer|||ence</w>": 190,
783
+ "ke|||ja": 191,
784
+ "keja|||di": 192,
785
+ "kejadi|||an</w>": 193,
786
+ "meng|||h": 194,
787
+ "mengh|||il": 195,
788
+ "menghil|||ang</w>": 196,
789
+ "m|||e": 197,
790
+ "h|||a": 198,
791
+ "pa|||t": 199,
792
+ "pat|||ter": 200,
793
+ "patter|||n</w>": 201,
794
+ "ter|||pi": 202,
795
+ "terpi|||sa": 203,
796
+ "terpisa|||h</w>": 204,
797
+ "hefe|||i": 205,
798
+ "hefei|||_": 206,
799
+ "hefei_|||b": 207,
800
+ "hefei_b|||r": 208,
801
+ "hefei_br|||anc": 209,
802
+ "hefei_branc|||h</w>": 210,
803
+ "h|||u": 211,
804
+ "hu|||bu": 212,
805
+ "com|||po": 213,
806
+ "compo|||s": 214,
807
+ "compos|||e</w>": 215,
808
+ "su|||su": 216,
809
+ "susu|||n</w>": 217,
810
+ "de|||te": 218,
811
+ "dete|||ksi</w>": 219,
812
+ "ke|||tida": 220,
813
+ "ketida|||k": 221,
814
+ "ketidak|||se": 222,
815
+ "ketidakse|||su": 223,
816
+ "ketidaksesu|||ai": 224,
817
+ "ketidaksesuai|||an</w>": 225,
818
+ "re|||c": 226,
819
+ "rec|||al": 227,
820
+ "recal|||l</w>": 228,
821
+ "i|||ng": 229,
822
+ "ing|||at</w>": 230,
823
+ "se|||mu": 231,
824
+ "semu|||a</w>": 232,
825
+ "predi|||ct</w>": 233,
826
+ "per|||k": 234,
827
+ "perk|||i": 235,
828
+ "perki|||ra": 236,
829
+ "perkira|||kan</w>": 237,
830
+ "v|||eri": 238,
831
+ "veri|||f": 239,
832
+ "verif|||y</w>": 240,
833
+ "ce|||k</w>": 241,
834
+ "kon|||si": 242,
835
+ "konsi|||s": 243,
836
+ "konsis|||t": 244,
837
+ "konsist|||en": 245,
838
+ "konsisten|||si</w>": 246,
839
+ "anomal|||y</w>": 247,
840
+ "b|||an": 248,
841
+ "ban|||di": 249,
842
+ "bandi|||ngkan</w>": 250,
843
+ "t|||ang": 251,
844
+ "tang|||g": 252,
845
+ "tangg|||al</w>": 253,
846
+ "hubu|||ngkan</w>": 254,
847
+ "f|||ra": 255,
848
+ "fra|||g": 256,
849
+ "frag|||me": 257,
850
+ "fragme|||n</w>": 258,
851
+ "a|||s": 259,
852
+ "d|||en": 260,
853
+ "den|||g": 261,
854
+ "deng|||an</w>": 262,
855
+ "a|||d": 263,
856
+ "ad|||a</w>": 264,
857
+ "b|||ar": 265,
858
+ "bar|||u</w>": 266,
859
+ "b|||i": 267,
860
+ "f|||il": 268,
861
+ "fil|||te": 269,
862
+ "filte|||r</w>": 270,
863
+ "e|||li": 271,
864
+ "eli|||m": 272,
865
+ "elim|||in": 273,
866
+ "elimin|||asi</w>": 274,
867
+ "re|||le": 275,
868
+ "rele|||v": 276,
869
+ "relev|||an</w>": 277,
870
+ "m|||asi</w>": 278,
871
+ "pencu|||ri</w>": 279,
872
+ "b|||er": 280,
873
+ "da|||la": 281,
874
+ "dala|||m</w>": 282,
875
+ "kon|||sum": 283,
876
+ "konsum|||si</w>": 284,
877
+ "ti|||ng": 285,
878
+ "p|||a</w>": 286,
879
+ "pencu|||r": 287,
880
+ "pencur|||i": 288,
881
+ "pencuri|||an</w>": 289,
882
+ "c|||a": 290,
883
+ "sum|||b": 291,
884
+ "sumb|||e": 292,
885
+ "sumbe|||r</w>": 293,
886
+ "in|||f": 294,
887
+ "inf|||or": 295,
888
+ "infor|||masi</w>": 296,
889
+ "k|||e</w>": 297,
890
+ "k|||a</w>": 298,
891
+ "in|||v": 299,
892
+ "inv|||e": 300,
893
+ "inve|||s": 301,
894
+ "inves|||ti": 302,
895
+ "investi|||g": 303,
896
+ "investig|||asi</w>": 304,
897
+ "y|||a": 305,
898
+ "m|||i": 306,
899
+ "mi|||si</w>": 307,
900
+ "as|||si": 308,
901
+ "assi|||g": 309,
902
+ "assig|||n</w>": 310,
903
+ "s|||en": 311,
904
+ "sen|||di": 312,
905
+ "sendi|||ri</w>": 313,
906
+ "a|||h</w>": 314,
907
+ "men|||u": 315,
908
+ "menu|||n": 316,
909
+ "menun|||j": 317,
910
+ "menunj|||u": 318,
911
+ "menunju|||k": 319,
912
+ "menunjuk|||kan</w>": 320,
913
+ "da|||t": 321,
914
+ "dat|||a</w>": 322,
915
+ "pi|||l</w>": 323,
916
+ "pa|||sa": 324,
917
+ "pasa|||r</w>": 325,
918
+ "g|||e": 326,
919
+ "ge|||la": 327,
920
+ "gela|||p</w>": 328,
921
+ "su|||c": 329,
922
+ "suc|||ce": 330,
923
+ "succe|||ss</w>": 331,
924
+ "ra|||t": 332,
925
+ "rat|||e</w>": 333,
926
+ "pa|||i": 334,
927
+ "pai|||r</w>": 335,
928
+ "le|||bi": 336,
929
+ "lebi|||h</w>": 337,
930
+ "ting|||g": 338,
931
+ "tingg|||i</w>": 339,
932
+ "bi|||as": 340,
933
+ "bias|||an": 341,
934
+ "biasan|||ya</w>": 342,
935
+ "pen|||al": 343,
936
+ "penal|||ar": 344,
937
+ "penalar|||an</w>": 345,
938
+ "se|||te": 346,
939
+ "sete|||la": 347,
940
+ "setela|||h</w>": 348,
941
+ "ha|||ri</w>": 349,
942
+ "sa|||m": 350,
943
+ "sam|||a</w>": 351,
944
+ "t|||an": 352,
945
+ "tan|||pa</w>": 353,
946
+ "j|||e": 354,
947
+ "je|||ja": 355,
948
+ "jeja|||k</w>": 356,
949
+ "te|||mu": 357,
950
+ "anomal|||i</w>": 358,
951
+ "predi|||ksi</w>": 359,
952
+ "ter|||s": 360,
953
+ "ters|||ang": 361,
954
+ "tersang|||ka</w>": 362,
955
+ "temu|||an</w>": 363,
956
+ "ber|||k": 364,
957
+ "berk|||o": 365,
958
+ "berko|||re": 366,
959
+ "berkore|||la": 367,
960
+ "berkorela|||si</w>": 368,
961
+ "ca|||ta": 369,
962
+ "cata|||t": 370,
963
+ "catat|||an</w>": 371,
964
+ "per|||g": 372,
965
+ "perg|||er": 373,
966
+ "perger|||a": 374,
967
+ "pergera|||kan</w>": 375,
968
+ "meng|||kon": 376,
969
+ "mengkon|||fi": 377,
970
+ "mengkonfi|||r": 378,
971
+ "mengkonfir|||masi</w>": 379,
972
+ "e|||v": 380,
973
+ "ev|||al": 381,
974
+ "eval|||u": 382,
975
+ "evalu|||asi</w>": 383,
976
+ "ke|||per": 384,
977
+ "keper|||ca": 385,
978
+ "keperca|||ya": 386,
979
+ "kepercaya|||an</w>": 387,
980
+ "meng|||ar": 388,
981
+ "mengar|||ah</w>": 389,
982
+ "ter|||se": 390,
983
+ "terse|||di": 391,
984
+ "tersedi|||a</w>": 392,
985
+ "me|||m": 393,
986
+ "mem|||ili": 394,
987
+ "memili|||k": 395,
988
+ "memilik|||i</w>": 396,
989
+ "kon|||e": 397,
990
+ "kone|||ksi</w>": 398,
991
+ "c|||on": 399,
992
+ "con|||fi": 400,
993
+ "confi|||d": 401,
994
+ "confid|||ence</w>": 402,
995
+ "men|||ja": 403,
996
+ "menja|||di</w>": 404,
997
+ "ku|||n": 405,
998
+ "kun|||c": 406,
999
+ "kunc|||i</w>": 407,
1000
+ "ha|||si": 408,
1001
+ "hasi|||l</w>": 409,
1002
+ "in|||si": 410,
1003
+ "insi|||d": 411,
1004
+ "insid|||e</w>": 412,
1005
+ "j|||o": 413,
1006
+ "jo|||b</w>": 414,
1007
+ "com|||pl": 415,
1008
+ "compl|||e": 416,
1009
+ "comple|||tion</w>": 417,
1010
+ "r|||an": 418,
1011
+ "ran|||ta": 419,
1012
+ "ranta|||i</w>": 420,
1013
+ "ber|||da": 421,
1014
+ "berda|||s": 422,
1015
+ "berdas|||ar": 423,
1016
+ "berdasar|||kan</w>": 424,
1017
+ "ter|||ha": 425,
1018
+ "terha|||da": 426,
1019
+ "terhada|||p</w>": 427,
1020
+ "ting|||k": 428,
1021
+ "tingk|||at</w>": 429,
1022
+ "ke|||ya": 430,
1023
+ "keya|||k": 431,
1024
+ "keyak|||in": 432,
1025
+ "keyakin|||an</w>": 433,
1026
+ "p|||ro": 434,
1027
+ "pro|||se": 435,
1028
+ "prose|||s</w>": 436,
1029
+ "de|||d": 437,
1030
+ "ded|||u": 438,
1031
+ "dedu|||ksi</w>": 439,
1032
+ "b|||a": 440,
1033
+ "ba|||h": 441,
1034
+ "bah|||w": 442,
1035
+ "bahw|||a</w>": 443,
1036
+ "si|||a": 444,
1037
+ "sia|||pa</w>": 445,
1038
+ "men|||cu": 446,
1039
+ "mencu|||ri</w>": 447,
1040
+ "ber|||ta": 448,
1041
+ "berta|||ha": 449,
1042
+ "bertaha|||p</w>": 450,
1043
+ "meng|||in": 451,
1044
+ "mengin|||di": 452,
1045
+ "mengindi|||k": 453,
1046
+ "mengindik|||a": 454,
1047
+ "mengindika|||si": 455,
1048
+ "mengindikasi|||kan</w>": 456,
1049
+ "pen|||e": 457,
1050
+ "pene|||l": 458,
1051
+ "penel|||u": 459,
1052
+ "penelu|||su": 460,
1053
+ "penelusu|||ran</w>": 461,
1054
+ "lo|||g": 462,
1055
+ "log|||i": 463,
1056
+ "logi|||ka</w>": 464,
1057
+ "l|||ang": 465,
1058
+ "lang|||k": 466,
1059
+ "langk|||ah</w>": 467,
1060
+ "hubu|||ng": 468,
1061
+ "hubung|||an</w>": 469,
1062
+ "an|||t": 470,
1063
+ "ant|||ar": 471,
1064
+ "antar|||a</w>": 472,
1065
+ "d|||an</w>": 473,
1066
+ "ter|||deteksi</w>": 474,
1067
+ "per|||ha": 475,
1068
+ "perha|||ti": 476,
1069
+ "perhati|||an</w>": 477,
1070
+ "ter|||da": 478,
1071
+ "terda|||pa": 479,
1072
+ "terdapa|||t</w>": 480,
1073
+ "ke|||jang": 481,
1074
+ "kejang|||g": 482,
1075
+ "kejangg|||al": 483,
1076
+ "kejanggal|||an</w>": 484,
1077
+ "la|||z": 485,
1078
+ "laz|||i": 486,
1079
+ "lazi|||m</w>": 487,
1080
+ "di|||temu": 488,
1081
+ "ditemu|||kan</w>": 489,
1082
+ "6|||8</w>": 490,
1083
+ "6|||2</w>": 491,
1084
+ "7|||3</w>": 492,
1085
+ "6|||7</w>": 493,
1086
+ "7|||4</w>": 494,
1087
+ "6|||6</w>": 495,
1088
+ "6|||9</w>": 496,
1089
+ "7|||0</w>": 497,
1090
+ "6|||5</w>": 498,
1091
+ "5|||9</w>": 499,
1092
+ "7|||1</w>": 500,
1093
+ "6|||3</w>": 501,
1094
+ "6|||1</w>": 502,
1095
+ "6|||4</w>": 503,
1096
+ "8|||1</w>": 504,
1097
+ "5|||8</w>": 505,
1098
+ "5|||4</w>": 506,
1099
+ "7|||6</w>": 507,
1100
+ "7|||5</w>": 508,
1101
+ "4|||6</w>": 509,
1102
+ "4|||9</w>": 510,
1103
+ "8|||2</w>": 511,
1104
+ "5|||7</w>": 512,
1105
+ "5|||3</w>": 513,
1106
+ "4|||8</w>": 514,
1107
+ "5|||2</w>": 515,
1108
+ "4|||4</w>": 516,
1109
+ "7|||2</w>": 517,
1110
+ "5|||6</w>": 518,
1111
+ "9|||3</w>": 519,
1112
+ "5|||5</w>": 520
1113
  },
1114
  "is_trained": true
1115
  }