File size: 81,839 Bytes
3284d90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import transformers\n",
    "\n",
    "model_id = \"./\"\n",
    "tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer.pad_token_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "128256"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.audio_token_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "128009"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.eos_token_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PreTrainedTokenizerFast(name_or_path='/mnt/jeff/InCar/LLamaNemotronOmni/NemotronOmni', vocab_size=128000, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|begin_of_text|>', 'eos_token': '<|eot_id|>', 'pad_token': '<|eot_id|>', 'additional_special_tokens': ['<audio_soft_token>', '<start_of_audio>', '<end_of_audio>', '']}, clean_up_tokenization_spaces=True, added_tokens_decoder={\n",
       "\t128000: AddedToken(\"<|begin_of_text|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128001: AddedToken(\"<|end_of_text|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128002: AddedToken(\"<|reserved_special_token_0|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128003: AddedToken(\"<|reserved_special_token_1|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128004: AddedToken(\"<|finetune_right_pad_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128005: AddedToken(\"<|reserved_special_token_2|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128006: AddedToken(\"<|start_header_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128007: AddedToken(\"<|end_header_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128008: AddedToken(\"<|eom_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128009: AddedToken(\"<|eot_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128010: AddedToken(\"<|python_tag|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128011: AddedToken(\"<|reserved_special_token_3|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128012: AddedToken(\"<|reserved_special_token_4|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128013: AddedToken(\"<|reserved_special_token_5|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128014: AddedToken(\"<|reserved_special_token_6|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128015: AddedToken(\"<|reserved_special_token_7|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128016: AddedToken(\"<|reserved_special_token_8|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128017: AddedToken(\"<|reserved_special_token_9|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128018: AddedToken(\"<|reserved_special_token_10|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128019: AddedToken(\"<|reserved_special_token_11|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128020: AddedToken(\"<|reserved_special_token_12|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128021: AddedToken(\"<|reserved_special_token_13|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128022: AddedToken(\"<|reserved_special_token_14|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128023: AddedToken(\"<|reserved_special_token_15|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128024: AddedToken(\"<|reserved_special_token_16|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128025: AddedToken(\"<|reserved_special_token_17|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128026: AddedToken(\"<|reserved_special_token_18|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128027: AddedToken(\"<|reserved_special_token_19|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128028: AddedToken(\"<|reserved_special_token_20|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128029: AddedToken(\"<|reserved_special_token_21|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128030: AddedToken(\"<|reserved_special_token_22|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128031: AddedToken(\"<|reserved_special_token_23|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128032: AddedToken(\"<|reserved_special_token_24|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128033: AddedToken(\"<|reserved_special_token_25|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128034: AddedToken(\"<|reserved_special_token_26|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128035: AddedToken(\"<|reserved_special_token_27|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128036: AddedToken(\"<|reserved_special_token_28|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128037: AddedToken(\"<|reserved_special_token_29|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128038: AddedToken(\"<|reserved_special_token_30|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128039: AddedToken(\"<|reserved_special_token_31|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128040: AddedToken(\"<|reserved_special_token_32|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128041: AddedToken(\"<|reserved_special_token_33|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128042: AddedToken(\"<|reserved_special_token_34|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128043: AddedToken(\"<|reserved_special_token_35|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128044: AddedToken(\"<|reserved_special_token_36|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128045: AddedToken(\"<|reserved_special_token_37|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128046: AddedToken(\"<|reserved_special_token_38|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128047: AddedToken(\"<|reserved_special_token_39|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128048: AddedToken(\"<|reserved_special_token_40|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128049: AddedToken(\"<|reserved_special_token_41|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128050: AddedToken(\"<|reserved_special_token_42|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128051: AddedToken(\"<|reserved_special_token_43|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128052: AddedToken(\"<|reserved_special_token_44|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128053: AddedToken(\"<|reserved_special_token_45|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128054: AddedToken(\"<|reserved_special_token_46|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128055: AddedToken(\"<|reserved_special_token_47|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128056: AddedToken(\"<|reserved_special_token_48|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128057: AddedToken(\"<|reserved_special_token_49|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128058: AddedToken(\"<|reserved_special_token_50|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128059: AddedToken(\"<|reserved_special_token_51|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128060: AddedToken(\"<|reserved_special_token_52|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128061: AddedToken(\"<|reserved_special_token_53|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128062: AddedToken(\"<|reserved_special_token_54|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128063: AddedToken(\"<|reserved_special_token_55|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128064: AddedToken(\"<|reserved_special_token_56|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128065: AddedToken(\"<|reserved_special_token_57|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128066: AddedToken(\"<|reserved_special_token_58|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128067: AddedToken(\"<|reserved_special_token_59|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128068: AddedToken(\"<|reserved_special_token_60|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128069: AddedToken(\"<|reserved_special_token_61|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128070: AddedToken(\"<|reserved_special_token_62|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128071: AddedToken(\"<|reserved_special_token_63|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128072: AddedToken(\"<|reserved_special_token_64|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128073: AddedToken(\"<|reserved_special_token_65|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128074: AddedToken(\"<|reserved_special_token_66|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128075: AddedToken(\"<|reserved_special_token_67|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128076: AddedToken(\"<|reserved_special_token_68|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128077: AddedToken(\"<|reserved_special_token_69|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128078: AddedToken(\"<|reserved_special_token_70|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128079: AddedToken(\"<|reserved_special_token_71|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128080: AddedToken(\"<|reserved_special_token_72|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128081: AddedToken(\"<|reserved_special_token_73|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128082: AddedToken(\"<|reserved_special_token_74|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128083: AddedToken(\"<|reserved_special_token_75|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128084: AddedToken(\"<|reserved_special_token_76|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128085: AddedToken(\"<|reserved_special_token_77|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128086: AddedToken(\"<|reserved_special_token_78|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128087: AddedToken(\"<|reserved_special_token_79|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128088: AddedToken(\"<|reserved_special_token_80|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128089: AddedToken(\"<|reserved_special_token_81|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128090: AddedToken(\"<|reserved_special_token_82|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128091: AddedToken(\"<|reserved_special_token_83|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128092: AddedToken(\"<|reserved_special_token_84|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128093: AddedToken(\"<|reserved_special_token_85|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128094: AddedToken(\"<|reserved_special_token_86|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128095: AddedToken(\"<|reserved_special_token_87|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128096: AddedToken(\"<|reserved_special_token_88|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128097: AddedToken(\"<|reserved_special_token_89|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128098: AddedToken(\"<|reserved_special_token_90|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128099: AddedToken(\"<|reserved_special_token_91|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128100: AddedToken(\"<|reserved_special_token_92|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128101: AddedToken(\"<|reserved_special_token_93|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128102: AddedToken(\"<|reserved_special_token_94|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128103: AddedToken(\"<|reserved_special_token_95|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128104: AddedToken(\"<|reserved_special_token_96|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128105: AddedToken(\"<|reserved_special_token_97|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128106: AddedToken(\"<|reserved_special_token_98|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128107: AddedToken(\"<|reserved_special_token_99|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128108: AddedToken(\"<|reserved_special_token_100|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128109: AddedToken(\"<|reserved_special_token_101|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128110: AddedToken(\"<|reserved_special_token_102|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128111: AddedToken(\"<|reserved_special_token_103|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128112: AddedToken(\"<|reserved_special_token_104|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128113: AddedToken(\"<|reserved_special_token_105|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128114: AddedToken(\"<|reserved_special_token_106|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128115: AddedToken(\"<|reserved_special_token_107|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128116: AddedToken(\"<|reserved_special_token_108|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128117: AddedToken(\"<|reserved_special_token_109|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128118: AddedToken(\"<|reserved_special_token_110|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128119: AddedToken(\"<|reserved_special_token_111|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128120: AddedToken(\"<|reserved_special_token_112|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128121: AddedToken(\"<|reserved_special_token_113|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128122: AddedToken(\"<|reserved_special_token_114|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128123: AddedToken(\"<|reserved_special_token_115|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128124: AddedToken(\"<|reserved_special_token_116|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128125: AddedToken(\"<|reserved_special_token_117|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128126: AddedToken(\"<|reserved_special_token_118|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128127: AddedToken(\"<|reserved_special_token_119|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128128: AddedToken(\"<|reserved_special_token_120|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128129: AddedToken(\"<|reserved_special_token_121|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128130: AddedToken(\"<|reserved_special_token_122|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128131: AddedToken(\"<|reserved_special_token_123|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128132: AddedToken(\"<|reserved_special_token_124|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128133: AddedToken(\"<|reserved_special_token_125|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128134: AddedToken(\"<|reserved_special_token_126|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128135: AddedToken(\"<|reserved_special_token_127|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128136: AddedToken(\"<|reserved_special_token_128|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128137: AddedToken(\"<|reserved_special_token_129|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128138: AddedToken(\"<|reserved_special_token_130|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128139: AddedToken(\"<|reserved_special_token_131|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128140: AddedToken(\"<|reserved_special_token_132|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128141: AddedToken(\"<|reserved_special_token_133|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128142: AddedToken(\"<|reserved_special_token_134|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128143: AddedToken(\"<|reserved_special_token_135|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128144: AddedToken(\"<|reserved_special_token_136|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128145: AddedToken(\"<|reserved_special_token_137|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128146: AddedToken(\"<|reserved_special_token_138|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128147: AddedToken(\"<|reserved_special_token_139|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128148: AddedToken(\"<|reserved_special_token_140|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128149: AddedToken(\"<|reserved_special_token_141|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128150: AddedToken(\"<|reserved_special_token_142|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128151: AddedToken(\"<|reserved_special_token_143|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128152: AddedToken(\"<|reserved_special_token_144|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128153: AddedToken(\"<|reserved_special_token_145|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128154: AddedToken(\"<|reserved_special_token_146|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128155: AddedToken(\"<|reserved_special_token_147|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128156: AddedToken(\"<|reserved_special_token_148|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128157: AddedToken(\"<|reserved_special_token_149|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128158: AddedToken(\"<|reserved_special_token_150|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128159: AddedToken(\"<|reserved_special_token_151|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128160: AddedToken(\"<|reserved_special_token_152|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128161: AddedToken(\"<|reserved_special_token_153|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128162: AddedToken(\"<|reserved_special_token_154|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128163: AddedToken(\"<|reserved_special_token_155|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128164: AddedToken(\"<|reserved_special_token_156|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128165: AddedToken(\"<|reserved_special_token_157|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128166: AddedToken(\"<|reserved_special_token_158|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128167: AddedToken(\"<|reserved_special_token_159|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128168: AddedToken(\"<|reserved_special_token_160|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128169: AddedToken(\"<|reserved_special_token_161|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128170: AddedToken(\"<|reserved_special_token_162|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128171: AddedToken(\"<|reserved_special_token_163|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128172: AddedToken(\"<|reserved_special_token_164|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128173: AddedToken(\"<|reserved_special_token_165|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128174: AddedToken(\"<|reserved_special_token_166|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128175: AddedToken(\"<|reserved_special_token_167|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128176: AddedToken(\"<|reserved_special_token_168|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128177: AddedToken(\"<|reserved_special_token_169|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128178: AddedToken(\"<|reserved_special_token_170|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128179: AddedToken(\"<|reserved_special_token_171|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128180: AddedToken(\"<|reserved_special_token_172|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128181: AddedToken(\"<|reserved_special_token_173|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128182: AddedToken(\"<|reserved_special_token_174|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128183: AddedToken(\"<|reserved_special_token_175|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128184: AddedToken(\"<|reserved_special_token_176|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128185: AddedToken(\"<|reserved_special_token_177|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128186: AddedToken(\"<|reserved_special_token_178|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128187: AddedToken(\"<|reserved_special_token_179|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128188: AddedToken(\"<|reserved_special_token_180|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128189: AddedToken(\"<|reserved_special_token_181|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128190: AddedToken(\"<|reserved_special_token_182|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128191: AddedToken(\"<|reserved_special_token_183|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128192: AddedToken(\"<|reserved_special_token_184|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128193: AddedToken(\"<|reserved_special_token_185|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128194: AddedToken(\"<|reserved_special_token_186|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128195: AddedToken(\"<|reserved_special_token_187|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128196: AddedToken(\"<|reserved_special_token_188|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128197: AddedToken(\"<|reserved_special_token_189|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128198: AddedToken(\"<|reserved_special_token_190|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128199: AddedToken(\"<|reserved_special_token_191|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128200: AddedToken(\"<|reserved_special_token_192|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128201: AddedToken(\"<|reserved_special_token_193|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128202: AddedToken(\"<|reserved_special_token_194|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128203: AddedToken(\"<|reserved_special_token_195|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128204: AddedToken(\"<|reserved_special_token_196|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128205: AddedToken(\"<|reserved_special_token_197|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128206: AddedToken(\"<|reserved_special_token_198|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128207: AddedToken(\"<|reserved_special_token_199|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128208: AddedToken(\"<|reserved_special_token_200|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128209: AddedToken(\"<|reserved_special_token_201|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128210: AddedToken(\"<|reserved_special_token_202|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128211: AddedToken(\"<|reserved_special_token_203|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128212: AddedToken(\"<|reserved_special_token_204|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128213: AddedToken(\"<|reserved_special_token_205|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128214: AddedToken(\"<|reserved_special_token_206|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128215: AddedToken(\"<|reserved_special_token_207|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128216: AddedToken(\"<|reserved_special_token_208|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128217: AddedToken(\"<|reserved_special_token_209|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128218: AddedToken(\"<|reserved_special_token_210|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128219: AddedToken(\"<|reserved_special_token_211|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128220: AddedToken(\"<|reserved_special_token_212|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128221: AddedToken(\"<|reserved_special_token_213|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128222: AddedToken(\"<|reserved_special_token_214|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128223: AddedToken(\"<|reserved_special_token_215|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128224: AddedToken(\"<|reserved_special_token_216|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128225: AddedToken(\"<|reserved_special_token_217|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128226: AddedToken(\"<|reserved_special_token_218|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128227: AddedToken(\"<|reserved_special_token_219|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128228: AddedToken(\"<|reserved_special_token_220|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128229: AddedToken(\"<|reserved_special_token_221|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128230: AddedToken(\"<|reserved_special_token_222|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128231: AddedToken(\"<|reserved_special_token_223|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128232: AddedToken(\"<|reserved_special_token_224|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128233: AddedToken(\"<|reserved_special_token_225|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128234: AddedToken(\"<|reserved_special_token_226|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128235: AddedToken(\"<|reserved_special_token_227|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128236: AddedToken(\"<|reserved_special_token_228|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128237: AddedToken(\"<|reserved_special_token_229|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128238: AddedToken(\"<|reserved_special_token_230|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128239: AddedToken(\"<|reserved_special_token_231|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128240: AddedToken(\"<|reserved_special_token_232|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128241: AddedToken(\"<|reserved_special_token_233|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128242: AddedToken(\"<|reserved_special_token_234|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128243: AddedToken(\"<|reserved_special_token_235|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128244: AddedToken(\"<|reserved_special_token_236|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128245: AddedToken(\"<|reserved_special_token_237|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128246: AddedToken(\"<|reserved_special_token_238|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128247: AddedToken(\"<|reserved_special_token_239|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128248: AddedToken(\"<|reserved_special_token_240|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128249: AddedToken(\"<|reserved_special_token_241|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128250: AddedToken(\"<|reserved_special_token_242|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128251: AddedToken(\"<|reserved_special_token_243|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128252: AddedToken(\"<|reserved_special_token_244|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128253: AddedToken(\"<|reserved_special_token_245|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128254: AddedToken(\"<|reserved_special_token_246|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128255: AddedToken(\"<|reserved_special_token_247|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128256: AddedToken(\"<audio_soft_token>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128257: AddedToken(\"<start_of_audio>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128258: AddedToken(\"<end_of_audio>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "}\n",
       ")"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.add_special_tokens({'additional_special_tokens':['<audio_soft_token>','<start_of_audio>','<end_of_audio>','']})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PreTrainedTokenizerFast(name_or_path='/mnt/jeff/InCar/LLamaNemotronOmni/Llama-3.1-Nemotron-Nano-4B-v1.1', vocab_size=128000, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|begin_of_text|>', 'eos_token': '<|eot_id|>', 'pad_token': '<|eot_id|>', 'additional_special_tokens': ['<audio_soft_token>', '<start_of_audio>', '<end_of_audio>', '']}, clean_up_tokenization_spaces=True, added_tokens_decoder={\n",
       "\t128000: AddedToken(\"<|begin_of_text|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128001: AddedToken(\"<|end_of_text|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128002: AddedToken(\"<|reserved_special_token_0|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128003: AddedToken(\"<|reserved_special_token_1|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128004: AddedToken(\"<|finetune_right_pad_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128005: AddedToken(\"<|reserved_special_token_2|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128006: AddedToken(\"<|start_header_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128007: AddedToken(\"<|end_header_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128008: AddedToken(\"<|eom_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128009: AddedToken(\"<|eot_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128010: AddedToken(\"<|python_tag|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128011: AddedToken(\"<|reserved_special_token_3|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128012: AddedToken(\"<|reserved_special_token_4|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128013: AddedToken(\"<|reserved_special_token_5|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128014: AddedToken(\"<|reserved_special_token_6|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128015: AddedToken(\"<|reserved_special_token_7|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128016: AddedToken(\"<|reserved_special_token_8|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128017: AddedToken(\"<|reserved_special_token_9|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128018: AddedToken(\"<|reserved_special_token_10|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128019: AddedToken(\"<|reserved_special_token_11|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128020: AddedToken(\"<|reserved_special_token_12|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128021: AddedToken(\"<|reserved_special_token_13|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128022: AddedToken(\"<|reserved_special_token_14|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128023: AddedToken(\"<|reserved_special_token_15|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128024: AddedToken(\"<|reserved_special_token_16|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128025: AddedToken(\"<|reserved_special_token_17|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128026: AddedToken(\"<|reserved_special_token_18|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128027: AddedToken(\"<|reserved_special_token_19|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128028: AddedToken(\"<|reserved_special_token_20|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128029: AddedToken(\"<|reserved_special_token_21|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128030: AddedToken(\"<|reserved_special_token_22|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128031: AddedToken(\"<|reserved_special_token_23|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128032: AddedToken(\"<|reserved_special_token_24|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128033: AddedToken(\"<|reserved_special_token_25|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128034: AddedToken(\"<|reserved_special_token_26|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128035: AddedToken(\"<|reserved_special_token_27|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128036: AddedToken(\"<|reserved_special_token_28|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128037: AddedToken(\"<|reserved_special_token_29|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128038: AddedToken(\"<|reserved_special_token_30|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128039: AddedToken(\"<|reserved_special_token_31|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128040: AddedToken(\"<|reserved_special_token_32|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128041: AddedToken(\"<|reserved_special_token_33|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128042: AddedToken(\"<|reserved_special_token_34|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128043: AddedToken(\"<|reserved_special_token_35|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128044: AddedToken(\"<|reserved_special_token_36|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128045: AddedToken(\"<|reserved_special_token_37|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128046: AddedToken(\"<|reserved_special_token_38|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128047: AddedToken(\"<|reserved_special_token_39|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128048: AddedToken(\"<|reserved_special_token_40|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128049: AddedToken(\"<|reserved_special_token_41|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128050: AddedToken(\"<|reserved_special_token_42|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128051: AddedToken(\"<|reserved_special_token_43|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128052: AddedToken(\"<|reserved_special_token_44|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128053: AddedToken(\"<|reserved_special_token_45|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128054: AddedToken(\"<|reserved_special_token_46|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128055: AddedToken(\"<|reserved_special_token_47|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128056: AddedToken(\"<|reserved_special_token_48|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128057: AddedToken(\"<|reserved_special_token_49|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128058: AddedToken(\"<|reserved_special_token_50|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128059: AddedToken(\"<|reserved_special_token_51|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128060: AddedToken(\"<|reserved_special_token_52|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128061: AddedToken(\"<|reserved_special_token_53|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128062: AddedToken(\"<|reserved_special_token_54|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128063: AddedToken(\"<|reserved_special_token_55|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128064: AddedToken(\"<|reserved_special_token_56|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128065: AddedToken(\"<|reserved_special_token_57|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128066: AddedToken(\"<|reserved_special_token_58|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128067: AddedToken(\"<|reserved_special_token_59|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128068: AddedToken(\"<|reserved_special_token_60|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128069: AddedToken(\"<|reserved_special_token_61|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128070: AddedToken(\"<|reserved_special_token_62|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128071: AddedToken(\"<|reserved_special_token_63|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128072: AddedToken(\"<|reserved_special_token_64|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128073: AddedToken(\"<|reserved_special_token_65|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128074: AddedToken(\"<|reserved_special_token_66|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128075: AddedToken(\"<|reserved_special_token_67|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128076: AddedToken(\"<|reserved_special_token_68|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128077: AddedToken(\"<|reserved_special_token_69|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128078: AddedToken(\"<|reserved_special_token_70|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128079: AddedToken(\"<|reserved_special_token_71|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128080: AddedToken(\"<|reserved_special_token_72|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128081: AddedToken(\"<|reserved_special_token_73|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128082: AddedToken(\"<|reserved_special_token_74|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128083: AddedToken(\"<|reserved_special_token_75|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128084: AddedToken(\"<|reserved_special_token_76|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128085: AddedToken(\"<|reserved_special_token_77|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128086: AddedToken(\"<|reserved_special_token_78|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128087: AddedToken(\"<|reserved_special_token_79|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128088: AddedToken(\"<|reserved_special_token_80|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128089: AddedToken(\"<|reserved_special_token_81|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128090: AddedToken(\"<|reserved_special_token_82|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128091: AddedToken(\"<|reserved_special_token_83|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128092: AddedToken(\"<|reserved_special_token_84|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128093: AddedToken(\"<|reserved_special_token_85|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128094: AddedToken(\"<|reserved_special_token_86|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128095: AddedToken(\"<|reserved_special_token_87|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128096: AddedToken(\"<|reserved_special_token_88|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128097: AddedToken(\"<|reserved_special_token_89|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128098: AddedToken(\"<|reserved_special_token_90|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128099: AddedToken(\"<|reserved_special_token_91|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128100: AddedToken(\"<|reserved_special_token_92|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128101: AddedToken(\"<|reserved_special_token_93|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128102: AddedToken(\"<|reserved_special_token_94|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128103: AddedToken(\"<|reserved_special_token_95|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128104: AddedToken(\"<|reserved_special_token_96|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128105: AddedToken(\"<|reserved_special_token_97|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128106: AddedToken(\"<|reserved_special_token_98|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128107: AddedToken(\"<|reserved_special_token_99|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128108: AddedToken(\"<|reserved_special_token_100|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128109: AddedToken(\"<|reserved_special_token_101|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128110: AddedToken(\"<|reserved_special_token_102|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128111: AddedToken(\"<|reserved_special_token_103|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128112: AddedToken(\"<|reserved_special_token_104|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128113: AddedToken(\"<|reserved_special_token_105|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128114: AddedToken(\"<|reserved_special_token_106|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128115: AddedToken(\"<|reserved_special_token_107|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128116: AddedToken(\"<|reserved_special_token_108|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128117: AddedToken(\"<|reserved_special_token_109|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128118: AddedToken(\"<|reserved_special_token_110|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128119: AddedToken(\"<|reserved_special_token_111|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128120: AddedToken(\"<|reserved_special_token_112|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128121: AddedToken(\"<|reserved_special_token_113|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128122: AddedToken(\"<|reserved_special_token_114|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128123: AddedToken(\"<|reserved_special_token_115|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128124: AddedToken(\"<|reserved_special_token_116|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128125: AddedToken(\"<|reserved_special_token_117|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128126: AddedToken(\"<|reserved_special_token_118|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128127: AddedToken(\"<|reserved_special_token_119|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128128: AddedToken(\"<|reserved_special_token_120|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128129: AddedToken(\"<|reserved_special_token_121|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128130: AddedToken(\"<|reserved_special_token_122|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128131: AddedToken(\"<|reserved_special_token_123|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128132: AddedToken(\"<|reserved_special_token_124|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128133: AddedToken(\"<|reserved_special_token_125|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128134: AddedToken(\"<|reserved_special_token_126|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128135: AddedToken(\"<|reserved_special_token_127|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128136: AddedToken(\"<|reserved_special_token_128|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128137: AddedToken(\"<|reserved_special_token_129|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128138: AddedToken(\"<|reserved_special_token_130|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128139: AddedToken(\"<|reserved_special_token_131|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128140: AddedToken(\"<|reserved_special_token_132|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128141: AddedToken(\"<|reserved_special_token_133|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128142: AddedToken(\"<|reserved_special_token_134|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128143: AddedToken(\"<|reserved_special_token_135|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128144: AddedToken(\"<|reserved_special_token_136|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128145: AddedToken(\"<|reserved_special_token_137|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128146: AddedToken(\"<|reserved_special_token_138|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128147: AddedToken(\"<|reserved_special_token_139|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128148: AddedToken(\"<|reserved_special_token_140|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128149: AddedToken(\"<|reserved_special_token_141|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128150: AddedToken(\"<|reserved_special_token_142|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128151: AddedToken(\"<|reserved_special_token_143|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128152: AddedToken(\"<|reserved_special_token_144|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128153: AddedToken(\"<|reserved_special_token_145|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128154: AddedToken(\"<|reserved_special_token_146|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128155: AddedToken(\"<|reserved_special_token_147|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128156: AddedToken(\"<|reserved_special_token_148|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128157: AddedToken(\"<|reserved_special_token_149|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128158: AddedToken(\"<|reserved_special_token_150|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128159: AddedToken(\"<|reserved_special_token_151|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128160: AddedToken(\"<|reserved_special_token_152|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128161: AddedToken(\"<|reserved_special_token_153|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128162: AddedToken(\"<|reserved_special_token_154|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128163: AddedToken(\"<|reserved_special_token_155|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128164: AddedToken(\"<|reserved_special_token_156|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128165: AddedToken(\"<|reserved_special_token_157|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128166: AddedToken(\"<|reserved_special_token_158|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128167: AddedToken(\"<|reserved_special_token_159|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128168: AddedToken(\"<|reserved_special_token_160|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128169: AddedToken(\"<|reserved_special_token_161|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128170: AddedToken(\"<|reserved_special_token_162|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128171: AddedToken(\"<|reserved_special_token_163|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128172: AddedToken(\"<|reserved_special_token_164|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128173: AddedToken(\"<|reserved_special_token_165|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128174: AddedToken(\"<|reserved_special_token_166|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128175: AddedToken(\"<|reserved_special_token_167|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128176: AddedToken(\"<|reserved_special_token_168|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128177: AddedToken(\"<|reserved_special_token_169|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128178: AddedToken(\"<|reserved_special_token_170|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128179: AddedToken(\"<|reserved_special_token_171|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128180: AddedToken(\"<|reserved_special_token_172|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128181: AddedToken(\"<|reserved_special_token_173|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128182: AddedToken(\"<|reserved_special_token_174|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128183: AddedToken(\"<|reserved_special_token_175|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128184: AddedToken(\"<|reserved_special_token_176|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128185: AddedToken(\"<|reserved_special_token_177|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128186: AddedToken(\"<|reserved_special_token_178|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128187: AddedToken(\"<|reserved_special_token_179|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128188: AddedToken(\"<|reserved_special_token_180|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128189: AddedToken(\"<|reserved_special_token_181|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128190: AddedToken(\"<|reserved_special_token_182|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128191: AddedToken(\"<|reserved_special_token_183|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128192: AddedToken(\"<|reserved_special_token_184|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128193: AddedToken(\"<|reserved_special_token_185|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128194: AddedToken(\"<|reserved_special_token_186|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128195: AddedToken(\"<|reserved_special_token_187|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128196: AddedToken(\"<|reserved_special_token_188|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128197: AddedToken(\"<|reserved_special_token_189|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128198: AddedToken(\"<|reserved_special_token_190|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128199: AddedToken(\"<|reserved_special_token_191|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128200: AddedToken(\"<|reserved_special_token_192|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128201: AddedToken(\"<|reserved_special_token_193|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128202: AddedToken(\"<|reserved_special_token_194|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128203: AddedToken(\"<|reserved_special_token_195|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128204: AddedToken(\"<|reserved_special_token_196|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128205: AddedToken(\"<|reserved_special_token_197|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128206: AddedToken(\"<|reserved_special_token_198|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128207: AddedToken(\"<|reserved_special_token_199|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128208: AddedToken(\"<|reserved_special_token_200|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128209: AddedToken(\"<|reserved_special_token_201|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128210: AddedToken(\"<|reserved_special_token_202|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128211: AddedToken(\"<|reserved_special_token_203|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128212: AddedToken(\"<|reserved_special_token_204|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128213: AddedToken(\"<|reserved_special_token_205|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128214: AddedToken(\"<|reserved_special_token_206|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128215: AddedToken(\"<|reserved_special_token_207|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128216: AddedToken(\"<|reserved_special_token_208|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128217: AddedToken(\"<|reserved_special_token_209|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128218: AddedToken(\"<|reserved_special_token_210|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128219: AddedToken(\"<|reserved_special_token_211|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128220: AddedToken(\"<|reserved_special_token_212|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128221: AddedToken(\"<|reserved_special_token_213|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128222: AddedToken(\"<|reserved_special_token_214|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128223: AddedToken(\"<|reserved_special_token_215|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128224: AddedToken(\"<|reserved_special_token_216|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128225: AddedToken(\"<|reserved_special_token_217|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128226: AddedToken(\"<|reserved_special_token_218|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128227: AddedToken(\"<|reserved_special_token_219|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128228: AddedToken(\"<|reserved_special_token_220|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128229: AddedToken(\"<|reserved_special_token_221|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128230: AddedToken(\"<|reserved_special_token_222|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128231: AddedToken(\"<|reserved_special_token_223|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128232: AddedToken(\"<|reserved_special_token_224|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128233: AddedToken(\"<|reserved_special_token_225|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128234: AddedToken(\"<|reserved_special_token_226|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128235: AddedToken(\"<|reserved_special_token_227|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128236: AddedToken(\"<|reserved_special_token_228|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128237: AddedToken(\"<|reserved_special_token_229|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128238: AddedToken(\"<|reserved_special_token_230|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128239: AddedToken(\"<|reserved_special_token_231|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128240: AddedToken(\"<|reserved_special_token_232|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128241: AddedToken(\"<|reserved_special_token_233|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128242: AddedToken(\"<|reserved_special_token_234|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128243: AddedToken(\"<|reserved_special_token_235|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128244: AddedToken(\"<|reserved_special_token_236|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128245: AddedToken(\"<|reserved_special_token_237|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128246: AddedToken(\"<|reserved_special_token_238|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128247: AddedToken(\"<|reserved_special_token_239|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128248: AddedToken(\"<|reserved_special_token_240|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128249: AddedToken(\"<|reserved_special_token_241|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128250: AddedToken(\"<|reserved_special_token_242|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128251: AddedToken(\"<|reserved_special_token_243|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128252: AddedToken(\"<|reserved_special_token_244|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128253: AddedToken(\"<|reserved_special_token_245|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128254: AddedToken(\"<|reserved_special_token_246|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128255: AddedToken(\"<|reserved_special_token_247|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128256: AddedToken(\"<audio_soft_token>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128257: AddedToken(\"<start_of_audio>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "\t128258: AddedToken(\"<end_of_audio>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
       "}\n",
       ")"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer.audio_token = \"<audio_soft_token>\"\n",
    "tokenizer.boa_token = \"<start_of_audio>\"\n",
    "tokenizer.eoa_token = \"<end_of_audio>\"\n",
    "tokenizer.audio_token_id = 128256"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('../tokenizer/tokenizer_config.json',\n",
       " '../tokenizer/special_tokens_map.json',\n",
       " '../tokenizer/tokenizer.json')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.save_pretrained('../tokenizer')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You are using a model of type NemotronOmni to instantiate a model of type nemotronOmni. This is not supported for all configurations of models and can yield errors.\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoConfig\n",
    "config = AutoConfig.from_pretrained('/mnt/jeff/InCar/LlamaNemotronOmni/Llama-3.1-NemotronOmni')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'llama'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "config.model_type"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}