jva96160 commited on
Commit
3284d90
·
verified ·
1 Parent(s): b8344d1

Upload 22 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
ASRDataset.py ADDED
@@ -0,0 +1,800 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datasets
2
+ datasets.config.DOWNLOADED_DATASETS_PATH = "/mnt/jeff/huggingface/data"
3
+ import os
4
+ os.environ['HF_HOME'] = '/mnt/jeff/huggingface'
5
+
6
+ import json
7
+ import os
8
+ from pathlib import Path
9
+
10
+ import numpy as np
11
+ import torch
12
+ import sacrebleu
13
+
14
+ from datasets import load_dataset
15
+ from torch.utils.data import Dataset, ConcatDataset
16
+ from tqdm import tqdm
17
+ from transformers import (
18
+ BatchFeature,
19
+ )
20
+ import pandas as pd
21
+ import soundfile as sf
22
+ from datasets import Audio
23
+ import random
24
+ from copy import deepcopy
25
+ import torchaudio
26
+
27
+ ANSWER_SUFFIX = "<|eot_id|>"
28
+ _IGNORE_INDEX = -100
29
+ class BaseAudioDataset(Dataset):
30
+ def __init__(self, processor, split, sampling_rate=16000, debug=False):
31
+ self.processor = processor
32
+ self.training = "train" in split or 'other' in split
33
+ self.debug = debug
34
+ self.sampling_rate = sampling_rate
35
+ self.name = ""
36
+
37
+ def set_dataset_name(self, name):
38
+ self.name = name
39
+
40
+ @staticmethod
41
+ def filter_corrupted_files(data, audio_field, text_fields, dataset_name, sampling_rate=16000, debug=True):
42
+ original_size = len(data)
43
+
44
+ data = data.cast_column(audio_field, Audio(decode=False))
45
+
46
+ def identify_corrupted_files(example):
47
+ try:
48
+ sf.read(example[audio_field]["path"])
49
+
50
+ for field in text_fields:
51
+ if field in example and example[field].replace('"', '') == "":
52
+ return False
53
+ return True
54
+ except Exception:
55
+ return False
56
+
57
+ data = data.filter(identify_corrupted_files, num_proc=16)
58
+ validated_size = len(data)
59
+
60
+ # Audio Decoding
61
+ data = data.cast_column(audio_field, Audio(sampling_rate=sampling_rate, decode=True))
62
+
63
+ if debug:
64
+ print(f"Dataset: {dataset_name}")
65
+ print(f"Original data nums: {original_size}")
66
+ print(f"After filtering data nums: {validated_size}")
67
+ print(f"Filtering ratio: {validated_size/original_size:.2%}")
68
+
69
+ return data
70
+
71
+ @staticmethod
72
+ def filter_by_audio_length(data, audio_field, min_sec=2, max_sec=20, debug=True):
73
+ original_size = len(data)
74
+
75
+ def filter_audio_by_length(example):
76
+ try:
77
+ audio = example[audio_field]['array']
78
+ channel = 1
79
+ if hasattr(audio, 'ndim') and audio.ndim > 1:
80
+ channel = audio.ndim
81
+ audio = audio.squeeze()
82
+ audio_length = len(audio) / example[audio_field]['sampling_rate'] / channel
83
+ return min_sec <= audio_length <= max_sec
84
+ except Exception as e:
85
+ if debug:
86
+ print(f"Error : {str(e)[:100]}... - sample excluded")
87
+ return False
88
+
89
+ data = data.filter(filter_audio_by_length, num_proc=16)
90
+ filtered_size = len(data)
91
+
92
+ if debug:
93
+ print(f"Before Length Filtering data nums: {original_size}")
94
+ print(f"After Length Filtering data nums: {filtered_size}")
95
+ print(f"Filtering ratio: {filtered_size/original_size:.2%}")
96
+
97
+ return data
98
+
99
+ def prepare_model_inputs(self, audio_array, instruction, answer_text):
100
+ user_message = {
101
+ 'role': 'user',
102
+ 'content': '<start_of_audio>' + instruction,
103
+ }
104
+ prompt = self.processor.tokenizer.apply_chat_template(
105
+ [user_message], tokenize=False, add_generation_prompt=True, add_bos=True
106
+ )
107
+
108
+ inputs = self.processor(
109
+ text=prompt,
110
+ audio=[audio_array],
111
+ return_tensors='pt'
112
+ )
113
+
114
+ answer = f"{answer_text}{ANSWER_SUFFIX}"
115
+ answer_ids = self.processor.tokenizer(answer, return_tensors='pt').input_ids
116
+
117
+ if self.debug:
118
+ self.debug = False
119
+ task_type = 'AST' if hasattr(self, 'ast') and self.ast else 'ASR'
120
+ lang_info = f" - {self.lang}" if hasattr(self, 'lang') else ""
121
+ print(f"{task_type}{lang_info}\nPROMPT: {prompt}\nINPUT: {self.processor.decode(inputs.input_ids[0], skip_special_tokens=False)}\nANSWER: {self.processor.decode(answer_ids[0], skip_special_tokens=False)}\n")
122
+ print(f"INPUT_MODE: {inputs.input_modes[0].item()}")
123
+
124
+ if self.training:
125
+ input_ids = torch.cat([inputs.input_ids, answer_ids], dim=1)
126
+ labels = torch.full_like(input_ids, _IGNORE_INDEX)
127
+ labels[:, -answer_ids.shape[1]:] = answer_ids
128
+ padding = torch.zeros((inputs.token_type_ids.shape[0], answer_ids.shape[1]))
129
+ token_type_ids = torch.cat([inputs.token_type_ids, padding], dim=1)
130
+ else:
131
+ input_ids = inputs.input_ids
132
+ labels = answer_ids
133
+ token_type_ids = inputs.token_type_ids
134
+ if self.training:
135
+ return {
136
+ 'input_ids': input_ids,
137
+ 'labels': labels,
138
+ 'token_type_ids': token_type_ids,
139
+ 'input_audio_embeds': inputs.input_audio_embeds,
140
+ 'audio_embed_sizes': inputs.audio_embed_sizes,
141
+ 'input_modes': inputs.input_modes,
142
+ }
143
+ else:
144
+ return {
145
+ 'input_ids': input_ids,
146
+ 'token_type_ids': token_type_ids,
147
+ 'input_audio_embeds': inputs.input_audio_embeds,
148
+ 'audio_embed_sizes': inputs.audio_embed_sizes,
149
+ 'input_modes': inputs.input_modes,
150
+ 'answer': answer_text,
151
+ }
152
+ # Libri Speech Dataset Class
153
+ class LibriSpeechDataset(BaseAudioDataset):
154
+ def __init__(self, processor, subset, split, sampling_rate=16000, debug=False):
155
+ super().__init__(processor, split, sampling_rate, debug)
156
+
157
+ self.set_dataset_name(f"LibriSpeech_{subset}")
158
+ # only ASR
159
+ self.ast = False
160
+ self.lang = "en"
161
+
162
+ # load dataset
163
+ self.data = load_dataset("/mnt/jeff/InCar/data/librispeech_asr",
164
+ subset,
165
+ split=split,
166
+ trust_remote_code=True,
167
+ cache_dir=Path("/mnt/jeff/InCar/data")
168
+ )
169
+
170
+ # (Optional) Audio length Filtering
171
+ self.data = self.filter_by_audio_length(self.data, "audio")
172
+
173
+ # Instruction Setting
174
+ self.instruction = random.choice(INSTRUCTION["asr"])
175
+
176
+ def __len__(self):
177
+ return len(self.data)
178
+
179
+ def __getitem__(self, idx):
180
+ data = self.data[idx]
181
+
182
+ # Libri Speech is only for ASR
183
+ answer_text = data["text"].replace('"', '')
184
+
185
+ return self.prepare_model_inputs(
186
+ data["audio"]["array"],
187
+ random.choice(INSTRUCTION["asr"]),
188
+ answer_text
189
+ )
190
+
191
+ # common_voice_16_1 dataset
192
+ class CommonVoiceDataset(BaseAudioDataset):
193
+ def __init__(self, processor, split, source_lang, sampling_rate=16000, debug=False):
194
+ super().__init__(processor, split, sampling_rate, debug)
195
+
196
+ self.set_dataset_name(f"CommonVoice_{source_lang}")
197
+ # only ASR
198
+ self.ast = False
199
+ self.lang=source_lang
200
+
201
+ # load dataset
202
+ if source_lang=="zh-TW":
203
+ data_path = "/mnt/jeff/InCar/data/common_voice_16_1"
204
+ else:
205
+ data_path = "/mnt/jeff/InCar/data/common_voice_17_0"
206
+ self.data = load_dataset(data_path,
207
+ source_lang,
208
+ split=split,
209
+ trust_remote_code=True,
210
+ cache_dir=Path("/mnt/jeff/InCar/data")
211
+ )
212
+ def prepare_dataset(batch):
213
+ """Function to preprocess the dataset with the .map method"""
214
+ transcription = batch["sentence"]
215
+
216
+ if transcription.startswith('"') and transcription.endswith('"'):
217
+ # we can remove trailing quotation marks as they do not affect the transcription
218
+ transcription = transcription[1:-1]
219
+
220
+ if transcription[-1] not in [".", "?", "!"]:
221
+ # append a full-stop to sentences that do not end in punctuation
222
+ transcription = transcription + "."
223
+
224
+ batch["sentence"] = transcription
225
+
226
+ return batch
227
+
228
+
229
+ import opencc
230
+ converter = opencc.OpenCC('s2tw.json')
231
+ def To_zhTW(batch):
232
+
233
+ transcription = converter.convert(batch["sentence"])
234
+ batch["sentence"] = transcription
235
+
236
+ return batch
237
+ self.data = self.data.map(prepare_dataset, desc="preprocess dataset")
238
+ if source_lang=='zh-CN':
239
+ self.data = self.data.map(To_zhTW, desc="preprocess dataset To_zhTW")
240
+
241
+
242
+ # (Optional) Audio length Filtering
243
+ self.data = self.filter_by_audio_length(self.data, "audio")
244
+
245
+ if source_lang == "zh-TW" and split=='train':
246
+ import torchaudio
247
+ from torchaudio import transforms
248
+ import copy
249
+ import pickle
250
+ import os
251
+ def subsample(batch):
252
+ batch['audio']['array']=torchaudio.functional.resample(torch.FloatTensor(batch['audio']['array']), orig_freq=batch['audio']['sampling_rate'], new_freq=16000)
253
+ batch['audio']['sampling_rate']=16000
254
+ return batch
255
+ def TW_data_augment_fast(batch):
256
+ speed_perturb_fast = transforms.SpeedPerturbation(batch['audio']['sampling_rate'], [1.1])
257
+ new_array_fast = speed_perturb_fast(torch.FloatTensor(batch['audio']['array']))[0]
258
+ batch['audio']['array'] = new_array_fast
259
+ return batch
260
+ def TW_data_augment_slow(batch):
261
+ speed_perturb_slow = transforms.SpeedPerturbation(batch['audio']['sampling_rate'], [0.9])
262
+ new_array_slow = speed_perturb_slow(torch.FloatTensor(batch['audio']['array']))[0]
263
+ batch['audio']['array'] = new_array_slow
264
+ return batch
265
+ # data = self.data.map(subsample, num_proc=1, desc="subsample")
266
+ fast_path = '/mnt/jeff/InCar/data/tw_fast.pkl'
267
+ if not os.path.exists(fast_path):
268
+ data_fast = self.data.map(TW_data_augment_fast, num_proc=1, desc="augment fast")
269
+ with open(fast_path,'wb') as f:
270
+ pickle.dump(data_fast,f)
271
+ else:
272
+ with open(fast_path,'rb') as f:
273
+ data_fast=pickle.load(f)
274
+
275
+ slow_path = '/mnt/jeff/InCar/data/data_slow.pkl'
276
+ if not os.path.exists(slow_path):
277
+ data_slow = self.data.map(TW_data_augment_slow, num_proc=1, desc="augment slow")
278
+ with open(slow_path,'wb') as f:
279
+ pickle.dump(data_slow,f)
280
+ else:
281
+ with open(slow_path,'rb') as f:
282
+ data_slow=pickle.load(f)
283
+ self.data = [d for d in self.data]+[d for d in data_fast]+[d for d in data_slow]
284
+
285
+ # Instruction Setting
286
+ self.instruction = random.choice(INSTRUCTION["asr"])
287
+
288
+ def __len__(self):
289
+ return len(self.data)
290
+
291
+ def __getitem__(self, idx):
292
+ data = self.data[idx]
293
+
294
+ answer_text = data["sentence"]
295
+ return self.prepare_model_inputs(
296
+ data["audio"]["array"],
297
+ random.choice(INSTRUCTION["asr"]),
298
+ answer_text
299
+ )
300
+
301
+
302
+ # Fleurs Dataset Class
303
+ class FleursDataset(BaseAudioDataset):
304
+ def __init__(self, processor, split, source_lang, target_lang=None,
305
+ mode="asr", sampling_rate=16000, debug=False):
306
+ super().__init__(processor, split, sampling_rate, debug)
307
+
308
+ self.set_dataset_name("Fleurs")
309
+ # Mode Setting (ASR or AST)
310
+ if mode not in ["asr", "ast"]:
311
+ raise ValueError("mode must be 'asr' or 'ast'.")
312
+
313
+ self.mode = mode
314
+ self.ast = (mode == "ast")
315
+ self.source_lang = source_lang
316
+
317
+ # Language name mapping (expand if needed)
318
+ self.lang_names = {
319
+ 'en_us': 'English', 'cmn_hans': 'Mandarin Chinese'
320
+ }
321
+
322
+ # load dataset - source language dataset
323
+ self.data = load_dataset("/mnt/jeff/InCar/data/fleurs",
324
+ source_lang,
325
+ split=split,
326
+ trust_remote_code=True,
327
+ cache_dir=Path("/mnt/jeff/InCar/data")
328
+ )
329
+ import opencc
330
+ converter = opencc.OpenCC('s2tw.json')
331
+ def prepare_dataset(batch):
332
+ transcription = converter.convert(batch["transcription"])
333
+ batch["transcription"] = transcription
334
+
335
+ return batch
336
+ if (source_lang=="cmn_hans_cn"):
337
+ self.data = self.data.map(prepare_dataset, desc="preprocess dataset")
338
+
339
+ # (Optional) Audio length Filtering
340
+ self.data = self.filter_by_audio_length(self.data, "audio")
341
+ self.target_lang_name = ""
342
+ # When AST mode, load target language dataset.
343
+ if self.ast:
344
+ if target_lang is None:
345
+ raise ValueError("AST mode requires target_lang.")
346
+
347
+ self.target_lang = target_lang
348
+ self.lang = f"{source_lang}_{target_lang}"
349
+
350
+ # load dataset - target language dataset (for translation)
351
+ target_data = load_dataset("/mnt/jeff/InCar/data/fleurs",
352
+ target_lang,
353
+ split=split,
354
+ trust_remote_code=True,
355
+ cache_dir=Path("/mnt/jeff/InCar/data")
356
+ )
357
+ if target_lang=="cmn_hans_cn":
358
+ target_data=target_data.map(prepare_dataset, desc="preprocess dataset")
359
+ source_dict = {item['id']: item for item in self.data}
360
+ target_dict = {item['id']: item for item in target_data}
361
+
362
+ # only Common ID, add translation fields
363
+ common_ids = set(source_dict.keys()) & set(target_dict.keys())
364
+ print(f"FLEURS AST Common data filtering: {len(self.data)} -> {len(common_ids)}")
365
+ self.data = [
366
+ {**source_dict[id], 'translation': target_dict[id]['transcription']}
367
+ for id in common_ids
368
+ ]
369
+
370
+ # Instruction Setting - use target language name
371
+ self.target_lang_name = self.lang_names.get(target_lang, target_lang.capitalize())
372
+ self.instruction = random.choice(INSTRUCTION["ast"])
373
+ else:
374
+ # ASR mode
375
+ self.lang = source_lang
376
+ self.instruction = random.choice(INSTRUCTION["asr"])
377
+
378
+ if self.debug:
379
+ print(f"FLEURS dataset loaded: {self.mode.upper()} mode")
380
+ print(f"source lang: {source_lang} ({self.lang_names.get(source_lang, source_lang)})")
381
+ if self.ast:
382
+ print(f"target lang: {target_lang} ({self.lang_names.get(target_lang, target_lang)})")
383
+ print(f"dataset size: {len(self.data)}")
384
+
385
+ def __len__(self):
386
+ return len(self.data)
387
+
388
+ def __getitem__(self, idx):
389
+ data = self.data[idx]
390
+ audio_array = data["audio"]["array"]
391
+
392
+ if self.ast:
393
+ answer_text = data["translation"]
394
+ instruction = random.choice(INSTRUCTION["ast"]).format(self.target_lang_name)
395
+ else:
396
+ answer_text = data["transcription"]
397
+ instruction = random.choice(INSTRUCTION["asr"])
398
+
399
+ return self.prepare_model_inputs(
400
+ audio_array,
401
+ instruction,
402
+ answer_text
403
+ )
404
+
405
+ class TWCostumData(BaseAudioDataset):
406
+
407
+ def __init__(self, processor, split="train", sampling_rate=16000,csv_path="", debug=False):
408
+ super().__init__(processor, split, sampling_rate, debug)
409
+ import pandas as pd
410
+ from datasets import Dataset, Audio
411
+
412
+
413
+ df = pd.read_csv(csv_path).fillna('')
414
+
415
+
416
+ self.set_dataset_name(f"TWCostumData")
417
+ self.data = Dataset.from_dict(
418
+ {
419
+ "audio": [audio for audio in df['audio']],
420
+ "sentence": [text for text in df['text']]
421
+ }
422
+ ).cast_column("audio", Audio(sampling_rate=16000))
423
+
424
+ # Instruction Setting
425
+ self.instruction = random.choice(INSTRUCTION["asr"])
426
+
427
+ def __len__(self):
428
+ return len(self.data)
429
+
430
+ def __getitem__(self, idx):
431
+ data = self.data[idx]
432
+
433
+ answer_text = data["sentence"]
434
+ return self.prepare_model_inputs(
435
+ data["audio"]["array"],
436
+ random.choice(INSTRUCTION["asr"]),
437
+ answer_text
438
+ )
439
+ def covost_collate_fn(batch):
440
+ input_ids_list = []
441
+ labels_list = []
442
+ token_type_ids_list = []
443
+ input_audio_embeds_list = []
444
+ audio_embed_sizes_list = []
445
+ audio_attention_mask_list = []
446
+ input_modes_list = []
447
+ audio_paths = []
448
+ for inputs in batch:
449
+ if 'audio_path' in inputs:
450
+ audio_paths.append(inputs['audio_path'])
451
+ input_ids_list.append(inputs['input_ids'][0])
452
+ labels_list.append(inputs['labels'][0])
453
+ token_type_ids_list.append(inputs['token_type_ids'][0])
454
+ if inputs['input_modes']==2:
455
+ input_audio_embeds_list.append(inputs['input_audio_embeds'])
456
+ audio_embed_sizes_list.append(inputs['audio_embed_sizes'])
457
+ audio_attention_mask_list.append(
458
+ inputs['input_audio_embeds'].new_full((inputs['input_audio_embeds'].size(1),), True, dtype=torch.bool)
459
+ )
460
+ # else:
461
+ # input_audio_embeds_list.append(None)
462
+ # audio_embed_sizes_list.append(None)
463
+ # audio_attention_mask_list.append(None)
464
+ input_modes_list.append(inputs['input_modes'])
465
+ # try:
466
+ token_type_ids = pad_sequence(token_type_ids_list, padding_side='left', padding_value=0)
467
+ input_ids = pad_sequence(input_ids_list, padding_side='left', padding_value=0)
468
+ labels = pad_sequence(labels_list, padding_side='left', padding_value=0)
469
+ audio_attention_mask = (
470
+ pad_sequence(audio_attention_mask_list, padding_side='left', padding_value=False)
471
+ if len(audio_attention_mask_list) > 1
472
+ else None
473
+ )
474
+ # except Exception as e:
475
+ # print(e)
476
+ # print(input_ids_list)
477
+ # print(labels_list)
478
+ # raise
479
+ attention_mask = (input_ids != 0).long()
480
+ input_audio_embeds = cat_with_pad(input_audio_embeds_list, dim=0) if len(input_audio_embeds_list)>0 else None
481
+ audio_embed_sizes = torch.cat(audio_embed_sizes_list) if len(audio_embed_sizes_list)>0 else None
482
+ input_modes = torch.cat(input_modes_list)
483
+ if len(audio_paths)>0:
484
+ return BatchFeature(
485
+ {
486
+ "audio_path": audio_paths,
487
+ 'input_ids': input_ids,
488
+ 'labels': labels,
489
+ 'token_type_ids': token_type_ids,
490
+ 'attention_mask': attention_mask,
491
+ 'input_audio_embeds': input_audio_embeds,
492
+ 'audio_embed_sizes': audio_embed_sizes,
493
+ 'audio_attention_mask': audio_attention_mask,
494
+ 'input_modes': input_modes,
495
+ }
496
+ )
497
+ else:
498
+ return BatchFeature(
499
+ {
500
+ 'input_ids': input_ids,
501
+ 'labels': labels,
502
+ 'token_type_ids': token_type_ids,
503
+ 'attention_mask': attention_mask,
504
+ 'input_audio_embeds': input_audio_embeds,
505
+ 'audio_embed_sizes': audio_embed_sizes,
506
+ 'audio_attention_mask': audio_attention_mask,
507
+ 'input_modes': input_modes,
508
+ }
509
+ )
510
+
511
+ def pad_sequence(sequences, padding_side='left', padding_value=0):
512
+ """
513
+ Pad a list of sequences to the same length.
514
+ sequences: list of tensors in [seq_len, *] shape
515
+ """
516
+ assert padding_side in ['right', 'left']
517
+ max_size = sequences[0].size()
518
+ trailing_dims = max_size[1:]
519
+ max_len = max(len(seq) for seq in sequences)
520
+ batch_size = len(sequences)
521
+ output = sequences[0].new_full((batch_size, max_len) + trailing_dims, padding_value)
522
+ for i, seq in enumerate(sequences):
523
+ length = seq.size(0)
524
+ if padding_side == 'right':
525
+ output.data[i, :length] = seq
526
+ else:
527
+ output.data[i, -length:] = seq
528
+ return output
529
+
530
+ def cat_with_pad(tensors, dim, padding_value=0):
531
+ """
532
+ cat along dim, while pad to max for all other dims
533
+ """
534
+ ndim = tensors[0].dim()
535
+ assert all(
536
+ t.dim() == ndim for t in tensors[1:]
537
+ ), 'All tensors must have the same number of dimensions'
538
+
539
+ out_size = [max(t.shape[i] for t in tensors) for i in range(ndim)]
540
+ out_size[dim] = sum(t.shape[dim] for t in tensors)
541
+ output = tensors[0].new_full(out_size, padding_value)
542
+
543
+ index = 0
544
+ for t in tensors:
545
+ # Create a slice list where every dimension except dim is full slice
546
+ slices = [slice(0, t.shape[d]) for d in range(ndim)]
547
+ # Update only the concat dimension slice
548
+ slices[dim] = slice(index, index + t.shape[dim])
549
+
550
+ output[slices] = t
551
+ index += t.shape[dim]
552
+
553
+ return output
554
+
555
+
556
+
557
+ class MultiturnAudioDataset(BaseAudioDataset):
558
+ def __init__(self, processor, split="train", sampling_rate=16000,json_path="",text_only=False, debug=False):
559
+ super().__init__(processor, split, sampling_rate, debug)
560
+ from llamafactory.data.template import Llama2Template,parse_template
561
+ from llamafactory.data.formatter import EmptyFormatter, FunctionFormatter, StringFormatter, ToolFormatter
562
+ from llamafactory.data.mm_plugin import get_mm_plugin
563
+ import json
564
+ self.train=False
565
+ self.text_only=text_only
566
+ with open(json_path) as f:
567
+ js_data = json.load(f)
568
+ if split=='train':
569
+ self.train=True
570
+ js_data = js_data[:int(len(js_data)*0.8)]
571
+ else:
572
+ js_data = js_data[-int(len(js_data)*0.2):]
573
+ for conv in js_data:
574
+ for mess in conv['conversations']:
575
+ if 'audio_path' in mess:
576
+ mess['audio_path'] = mess['audio_path'].replace('/home/jeff/codes/llm/InCar/srdc_generate_tts/','/mnt/jeff/InCar/data/multiturn_data/')
577
+ default_system = ""#"""You are a helpful assistant that determines how to solve problems based on user needs and converts user speech into text.\n"""
578
+ self.template=Llama2Template(
579
+ format_user=StringFormatter(slots=["<|begin_of_text|>user\n{{content}}<|end_of_text|>\n<|begin_of_text|>model\n"]),
580
+ format_assistant=StringFormatter(slots=["{{content}}<end_of_turn>\n"]),
581
+ format_system=StringFormatter(slots=["{{content}}\n\n"]),
582
+ format_function=FunctionFormatter(slots=["{{content}}", {"eos_token"}], tool_format="default"),
583
+ format_tools = ToolFormatter(tool_format="default"),
584
+ format_observation=StringFormatter(
585
+ slots=["<|begin_of_text|>tool\n{{content}}<|end_of_text|>\n<|begin_of_text|>model\n"]
586
+ ),
587
+ default_system=default_system,
588
+ thought_words=("<think>", "</think>"),
589
+ efficient_eos=False,
590
+ replace_eos=False,
591
+ replace_jinja_template=False,
592
+ format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
593
+ stop_words=["<|end_of_text|>"],
594
+ mm_plugin=get_mm_plugin(name="base"),
595
+ enable_thinking=False
596
+ )
597
+
598
+ self.set_dataset_name(f"MultiturnCostumData")
599
+
600
+
601
+ self.data = []
602
+ self.text_only_data = []
603
+ for conv in js_data:
604
+ tools = conv['tools'] if 'tools' in conv else ""
605
+ system = conv['system'] if 'system' in conv else default_system
606
+ tmp = {
607
+ 'tools':tools,
608
+ 'system':system,
609
+ 'messages':[],
610
+ }
611
+ for i,mess in enumerate(conv['conversations']):
612
+ tmp['messages'].append(mess)
613
+ if mess['from']=='human':
614
+ tmp['messages'].append(conv['conversations'][i+1])
615
+ d = deepcopy(tmp)
616
+ d['audio_array'] = torchaudio.load(mess['audio_path'])[0][0]
617
+ self.data.append(d)
618
+ if self.text_only:
619
+ self.text_only_data.append(deepcopy(tmp))
620
+ tmp['messages'].pop()
621
+ elif mess['from']=='observation':
622
+ tmp['messages'].append(conv['conversations'][i+1])
623
+ d = deepcopy(tmp)
624
+ self.text_only_data.append(d)
625
+ tmp['messages'].pop()
626
+ if text_only:
627
+ self.data=self.text_only_data
628
+
629
+
630
+ def prepare_multiturn_model_inputs(self, audio_array, messages, system="", tools=""):
631
+ prompt = ""
632
+ answer_text = ""
633
+ user_transcribe = ""
634
+ audio_paths = []
635
+ for i, message in enumerate(messages):
636
+ elements = []
637
+
638
+ system_text = ""
639
+ if i == 0:
640
+ elements += self.template.format_prefix.apply()
641
+ if system or tools:
642
+ tool_text = self.template.format_tools.apply(content=tools)[0] if tools else ""
643
+ system_text = self.template.format_system.apply(content=(system + tool_text))[0]
644
+
645
+ if message["from"] == "human":
646
+ if i==len(messages)-2 and not self.text_only:
647
+ user_transcribe = message["value"]
648
+ elements += self.template.format_user.apply(content=system_text+'<start_of_audio>')
649
+ else:
650
+ elements += self.template.format_user.apply(content=system_text + message["value"])
651
+ audio_paths.append(message['audio_path'])
652
+ elif message["from"] == "gpt":
653
+ elements += self.template.format_assistant.apply(content=message["value"])
654
+ elif message["from"] == "observation":
655
+ elements += self.template.format_observation.apply(content=message["value"])
656
+ elif message["from"] == "function_call":
657
+ elements += self.template.format_function.apply(content=message["value"])
658
+ else:
659
+ raise NotImplementedError("Unexpected role: {}".format(message["from"]))
660
+
661
+
662
+ for elem in elements:
663
+ ele_str = ""
664
+ if isinstance(elem, str):
665
+ ele_str=elem
666
+ elif isinstance(elem, set):
667
+ if "bos_token" in elem and self.processor.tokenizer.bos_token_id is not None:
668
+ ele_str = self.processor.tokenizer.bos_token
669
+ elif "eos_token" in elem and self.processor.tokenizer.eos_token_id is not None:
670
+ ele_str = self.processor.tokenizer.eos_token
671
+ if i == len(messages)-1:
672
+ answer_text+=ele_str
673
+ else:
674
+ prompt+=ele_str
675
+
676
+
677
+ if type(audio_array)!=type(None):
678
+ inputs = self.processor(
679
+ text=prompt,
680
+ audio=[audio_array],
681
+ return_tensors='pt'
682
+ )
683
+ answer = "\nUser transcribe is : {};\nGPT output is : {}{}".format(user_transcribe,answer_text,ANSWER_SUFFIX)
684
+ else:
685
+ inputs = self.processor(
686
+ text=prompt,
687
+ audio=None,
688
+ return_tensors='pt'
689
+ )
690
+ answer = f"{answer_text}{ANSWER_SUFFIX}"
691
+ # print('user_transcribe',user_transcribe)
692
+ # print('answer_text', answer)
693
+ # print('prompt',prompt)
694
+ answer_ids = self.processor.tokenizer(answer, return_tensors='pt').input_ids
695
+
696
+ if self.debug:
697
+ self.debug = False
698
+ task_type = 'AST' if hasattr(self, 'ast') and self.ast else 'ASR'
699
+ lang_info = f" - {self.lang}" if hasattr(self, 'lang') else ""
700
+ print(f"{task_type}{lang_info}\nPROMPT: {prompt}\nINPUT: {self.processor.decode(inputs.input_ids[0], skip_special_tokens=False)}\nANSWER: {self.processor.decode(answer_ids[0], skip_special_tokens=False)}\n")
701
+ print(f"INPUT_MODE: {inputs.input_modes[0].item()}")
702
+
703
+ if self.training:
704
+ input_ids = torch.cat([inputs.input_ids, answer_ids], dim=1)
705
+ labels = torch.full_like(input_ids, _IGNORE_INDEX)
706
+ labels[:, -answer_ids.shape[1]:] = answer_ids
707
+ padding = torch.zeros((inputs.token_type_ids.shape[0], answer_ids.shape[1]))
708
+ token_type_ids = torch.cat([inputs.token_type_ids, padding], dim=1)
709
+ else:
710
+ input_ids = inputs.input_ids
711
+ labels = answer_ids
712
+ token_type_ids = inputs.token_type_ids
713
+ if type(audio_array)!=type(None):
714
+ if not self.train:
715
+ return {
716
+ "audio_path": audio_paths,
717
+ 'input_ids': input_ids,
718
+ 'labels': labels,
719
+ 'token_type_ids': token_type_ids,
720
+ 'input_audio_embeds': inputs.input_audio_embeds,
721
+ 'audio_embed_sizes': inputs.audio_embed_sizes,
722
+ 'input_modes': inputs.input_modes,
723
+ }
724
+ else:
725
+ return {
726
+ 'input_ids': input_ids,
727
+ 'labels': labels,
728
+ 'token_type_ids': token_type_ids,
729
+ 'input_audio_embeds': inputs.input_audio_embeds,
730
+ 'audio_embed_sizes': inputs.audio_embed_sizes,
731
+ 'input_modes': inputs.input_modes,
732
+ }
733
+ else:
734
+ return {
735
+ 'input_ids': input_ids,
736
+ 'labels': labels,
737
+ 'token_type_ids': token_type_ids,
738
+ 'input_audio_embeds': None,
739
+ 'audio_embed_sizes': None,
740
+ 'input_modes': inputs.input_modes,
741
+ }
742
+ def __len__(self):
743
+ return len(self.data)
744
+
745
+ def __getitem__(self, idx):
746
+ data = self.data[idx]
747
+ return self.prepare_multiturn_model_inputs(
748
+ audio_array=data["audio_array"] if "audio_array" in data else None,
749
+ messages=data['messages'],
750
+ system=data["system"],
751
+ tools=data["tools"]
752
+ )
753
+
754
+
755
+
756
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
757
+
758
+ INSTRUCTION = {
759
+ "ast": [
760
+ "Translate the audio to {0}.",
761
+ "Translate the audio clip into {0}.",
762
+ "Based on the attached audio, generate a comprehensive {0} translation of the spoken content.",
763
+ "Translate the provided audio file into {0}.",
764
+ "Convert the audio speech to {0} text.",
765
+ "Write an {0} translation of the audio file.",
766
+ "Translate spoken words from the audio into {0}.",
767
+ "Create an {0} version of the audio content.",
768
+ "Produce an accurate {0} translation of the audio.",
769
+ "Extract speech from the audio and translate it to {0}.",
770
+ "Turn the audio into readable {0} text.",
771
+ "Write all spoken content from the audio in {0}.",
772
+ "Generate an {0} translation of the speech in the file.",
773
+ "Convert the recording into {0} text.",
774
+ "Accurately translate the audio recording to {0}.",
775
+ "Write down dialogue from the given audio in {0}.",
776
+ "Translate all speech in this audio file to {0}.",
777
+ "Create an accurate {0} version of the speech.",
778
+ "Perform a complete {0} translation of the audio."
779
+ ],
780
+ "asr": [
781
+ "Transcribe the audio clip into text.",
782
+ "Based on the attached audio, generate a comprehensive text transcription of the spoken content.",
783
+ "Transcribe the provided audio file into text.",
784
+ "Convert the audio speech to text.",
785
+ "Write a transcript of the audio file.",
786
+ "Transcribe spoken words from the audio.",
787
+ "Create a text version of the audio content.",
788
+ "Produce a verbatim transcript of the audio.",
789
+ "Extract and transcribe speech from the audio.",
790
+ "Turn the audio into readable text.",
791
+ "Write all spoken words from the audio.",
792
+ "Generate a transcript of the speech in the file.",
793
+ "Convert the recording into a text transcript.",
794
+ "Accurately transcribe the audio recording.",
795
+ "Write down dialogue from the given audio.",
796
+ "Transcribe all speech in this audio file.",
797
+ "Create an accurate text version of the speech.",
798
+ "Perform a complete transcription of the audio."
799
+ ],
800
+ }
add_token.ipynb ADDED
@@ -0,0 +1,748 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import torch\n",
10
+ "import transformers\n",
11
+ "\n",
12
+ "model_id = \"./\"\n",
13
+ "tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": 5,
19
+ "metadata": {},
20
+ "outputs": [],
21
+ "source": [
22
+ "tokenizer.pad_token_id"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": 5,
28
+ "metadata": {},
29
+ "outputs": [
30
+ {
31
+ "data": {
32
+ "text/plain": [
33
+ "128256"
34
+ ]
35
+ },
36
+ "execution_count": 5,
37
+ "metadata": {},
38
+ "output_type": "execute_result"
39
+ }
40
+ ],
41
+ "source": [
42
+ "tokenizer.audio_token_id"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": 2,
48
+ "metadata": {},
49
+ "outputs": [
50
+ {
51
+ "data": {
52
+ "text/plain": [
53
+ "128009"
54
+ ]
55
+ },
56
+ "execution_count": 2,
57
+ "metadata": {},
58
+ "output_type": "execute_result"
59
+ }
60
+ ],
61
+ "source": [
62
+ "tokenizer.eos_token_id"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": 3,
68
+ "metadata": {},
69
+ "outputs": [
70
+ {
71
+ "data": {
72
+ "text/plain": [
73
+ "PreTrainedTokenizerFast(name_or_path='/mnt/jeff/InCar/LLamaNemotronOmni/NemotronOmni', vocab_size=128000, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|begin_of_text|>', 'eos_token': '<|eot_id|>', 'pad_token': '<|eot_id|>', 'additional_special_tokens': ['<audio_soft_token>', '<start_of_audio>', '<end_of_audio>', '']}, clean_up_tokenization_spaces=True, added_tokens_decoder={\n",
74
+ "\t128000: AddedToken(\"<|begin_of_text|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
75
+ "\t128001: AddedToken(\"<|end_of_text|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
76
+ "\t128002: AddedToken(\"<|reserved_special_token_0|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
77
+ "\t128003: AddedToken(\"<|reserved_special_token_1|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
78
+ "\t128004: AddedToken(\"<|finetune_right_pad_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
79
+ "\t128005: AddedToken(\"<|reserved_special_token_2|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
80
+ "\t128006: AddedToken(\"<|start_header_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
81
+ "\t128007: AddedToken(\"<|end_header_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
82
+ "\t128008: AddedToken(\"<|eom_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
83
+ "\t128009: AddedToken(\"<|eot_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
84
+ "\t128010: AddedToken(\"<|python_tag|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
85
+ "\t128011: AddedToken(\"<|reserved_special_token_3|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
86
+ "\t128012: AddedToken(\"<|reserved_special_token_4|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
87
+ "\t128013: AddedToken(\"<|reserved_special_token_5|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
88
+ "\t128014: AddedToken(\"<|reserved_special_token_6|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
89
+ "\t128015: AddedToken(\"<|reserved_special_token_7|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
90
+ "\t128016: AddedToken(\"<|reserved_special_token_8|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
91
+ "\t128017: AddedToken(\"<|reserved_special_token_9|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
92
+ "\t128018: AddedToken(\"<|reserved_special_token_10|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
93
+ "\t128019: AddedToken(\"<|reserved_special_token_11|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
94
+ "\t128020: AddedToken(\"<|reserved_special_token_12|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
95
+ "\t128021: AddedToken(\"<|reserved_special_token_13|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
96
+ "\t128022: AddedToken(\"<|reserved_special_token_14|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
97
+ "\t128023: AddedToken(\"<|reserved_special_token_15|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
98
+ "\t128024: AddedToken(\"<|reserved_special_token_16|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
99
+ "\t128025: AddedToken(\"<|reserved_special_token_17|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
100
+ "\t128026: AddedToken(\"<|reserved_special_token_18|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
101
+ "\t128027: AddedToken(\"<|reserved_special_token_19|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
102
+ "\t128028: AddedToken(\"<|reserved_special_token_20|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
103
+ "\t128029: AddedToken(\"<|reserved_special_token_21|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
104
+ "\t128030: AddedToken(\"<|reserved_special_token_22|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
105
+ "\t128031: AddedToken(\"<|reserved_special_token_23|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
106
+ "\t128032: AddedToken(\"<|reserved_special_token_24|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
107
+ "\t128033: AddedToken(\"<|reserved_special_token_25|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
108
+ "\t128034: AddedToken(\"<|reserved_special_token_26|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
109
+ "\t128035: AddedToken(\"<|reserved_special_token_27|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
110
+ "\t128036: AddedToken(\"<|reserved_special_token_28|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
111
+ "\t128037: AddedToken(\"<|reserved_special_token_29|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
112
+ "\t128038: AddedToken(\"<|reserved_special_token_30|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
113
+ "\t128039: AddedToken(\"<|reserved_special_token_31|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
114
+ "\t128040: AddedToken(\"<|reserved_special_token_32|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
115
+ "\t128041: AddedToken(\"<|reserved_special_token_33|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
116
+ "\t128042: AddedToken(\"<|reserved_special_token_34|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
117
+ "\t128043: AddedToken(\"<|reserved_special_token_35|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
118
+ "\t128044: AddedToken(\"<|reserved_special_token_36|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
119
+ "\t128045: AddedToken(\"<|reserved_special_token_37|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
120
+ "\t128046: AddedToken(\"<|reserved_special_token_38|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
121
+ "\t128047: AddedToken(\"<|reserved_special_token_39|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
122
+ "\t128048: AddedToken(\"<|reserved_special_token_40|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
123
+ "\t128049: AddedToken(\"<|reserved_special_token_41|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
124
+ "\t128050: AddedToken(\"<|reserved_special_token_42|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
125
+ "\t128051: AddedToken(\"<|reserved_special_token_43|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
126
+ "\t128052: AddedToken(\"<|reserved_special_token_44|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
127
+ "\t128053: AddedToken(\"<|reserved_special_token_45|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
128
+ "\t128054: AddedToken(\"<|reserved_special_token_46|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
129
+ "\t128055: AddedToken(\"<|reserved_special_token_47|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
130
+ "\t128056: AddedToken(\"<|reserved_special_token_48|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
131
+ "\t128057: AddedToken(\"<|reserved_special_token_49|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
132
+ "\t128058: AddedToken(\"<|reserved_special_token_50|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
133
+ "\t128059: AddedToken(\"<|reserved_special_token_51|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
134
+ "\t128060: AddedToken(\"<|reserved_special_token_52|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
135
+ "\t128061: AddedToken(\"<|reserved_special_token_53|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
136
+ "\t128062: AddedToken(\"<|reserved_special_token_54|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
137
+ "\t128063: AddedToken(\"<|reserved_special_token_55|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
138
+ "\t128064: AddedToken(\"<|reserved_special_token_56|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
139
+ "\t128065: AddedToken(\"<|reserved_special_token_57|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
140
+ "\t128066: AddedToken(\"<|reserved_special_token_58|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
141
+ "\t128067: AddedToken(\"<|reserved_special_token_59|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
142
+ "\t128068: AddedToken(\"<|reserved_special_token_60|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
143
+ "\t128069: AddedToken(\"<|reserved_special_token_61|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
144
+ "\t128070: AddedToken(\"<|reserved_special_token_62|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
145
+ "\t128071: AddedToken(\"<|reserved_special_token_63|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
146
+ "\t128072: AddedToken(\"<|reserved_special_token_64|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
147
+ "\t128073: AddedToken(\"<|reserved_special_token_65|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
148
+ "\t128074: AddedToken(\"<|reserved_special_token_66|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
149
+ "\t128075: AddedToken(\"<|reserved_special_token_67|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
150
+ "\t128076: AddedToken(\"<|reserved_special_token_68|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
151
+ "\t128077: AddedToken(\"<|reserved_special_token_69|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
152
+ "\t128078: AddedToken(\"<|reserved_special_token_70|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
153
+ "\t128079: AddedToken(\"<|reserved_special_token_71|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
154
+ "\t128080: AddedToken(\"<|reserved_special_token_72|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
155
+ "\t128081: AddedToken(\"<|reserved_special_token_73|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
156
+ "\t128082: AddedToken(\"<|reserved_special_token_74|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
157
+ "\t128083: AddedToken(\"<|reserved_special_token_75|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
158
+ "\t128084: AddedToken(\"<|reserved_special_token_76|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
159
+ "\t128085: AddedToken(\"<|reserved_special_token_77|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
160
+ "\t128086: AddedToken(\"<|reserved_special_token_78|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
161
+ "\t128087: AddedToken(\"<|reserved_special_token_79|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
162
+ "\t128088: AddedToken(\"<|reserved_special_token_80|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
163
+ "\t128089: AddedToken(\"<|reserved_special_token_81|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
164
+ "\t128090: AddedToken(\"<|reserved_special_token_82|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
165
+ "\t128091: AddedToken(\"<|reserved_special_token_83|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
166
+ "\t128092: AddedToken(\"<|reserved_special_token_84|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
167
+ "\t128093: AddedToken(\"<|reserved_special_token_85|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
168
+ "\t128094: AddedToken(\"<|reserved_special_token_86|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
169
+ "\t128095: AddedToken(\"<|reserved_special_token_87|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
170
+ "\t128096: AddedToken(\"<|reserved_special_token_88|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
171
+ "\t128097: AddedToken(\"<|reserved_special_token_89|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
172
+ "\t128098: AddedToken(\"<|reserved_special_token_90|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
173
+ "\t128099: AddedToken(\"<|reserved_special_token_91|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
174
+ "\t128100: AddedToken(\"<|reserved_special_token_92|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
175
+ "\t128101: AddedToken(\"<|reserved_special_token_93|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
176
+ "\t128102: AddedToken(\"<|reserved_special_token_94|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
177
+ "\t128103: AddedToken(\"<|reserved_special_token_95|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
178
+ "\t128104: AddedToken(\"<|reserved_special_token_96|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
179
+ "\t128105: AddedToken(\"<|reserved_special_token_97|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
180
+ "\t128106: AddedToken(\"<|reserved_special_token_98|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
181
+ "\t128107: AddedToken(\"<|reserved_special_token_99|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
182
+ "\t128108: AddedToken(\"<|reserved_special_token_100|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
183
+ "\t128109: AddedToken(\"<|reserved_special_token_101|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
184
+ "\t128110: AddedToken(\"<|reserved_special_token_102|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
185
+ "\t128111: AddedToken(\"<|reserved_special_token_103|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
186
+ "\t128112: AddedToken(\"<|reserved_special_token_104|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
187
+ "\t128113: AddedToken(\"<|reserved_special_token_105|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
188
+ "\t128114: AddedToken(\"<|reserved_special_token_106|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
189
+ "\t128115: AddedToken(\"<|reserved_special_token_107|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
190
+ "\t128116: AddedToken(\"<|reserved_special_token_108|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
191
+ "\t128117: AddedToken(\"<|reserved_special_token_109|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
192
+ "\t128118: AddedToken(\"<|reserved_special_token_110|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
193
+ "\t128119: AddedToken(\"<|reserved_special_token_111|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
194
+ "\t128120: AddedToken(\"<|reserved_special_token_112|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
195
+ "\t128121: AddedToken(\"<|reserved_special_token_113|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
196
+ "\t128122: AddedToken(\"<|reserved_special_token_114|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
197
+ "\t128123: AddedToken(\"<|reserved_special_token_115|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
198
+ "\t128124: AddedToken(\"<|reserved_special_token_116|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
199
+ "\t128125: AddedToken(\"<|reserved_special_token_117|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
200
+ "\t128126: AddedToken(\"<|reserved_special_token_118|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
201
+ "\t128127: AddedToken(\"<|reserved_special_token_119|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
202
+ "\t128128: AddedToken(\"<|reserved_special_token_120|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
203
+ "\t128129: AddedToken(\"<|reserved_special_token_121|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
204
+ "\t128130: AddedToken(\"<|reserved_special_token_122|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
205
+ "\t128131: AddedToken(\"<|reserved_special_token_123|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
206
+ "\t128132: AddedToken(\"<|reserved_special_token_124|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
207
+ "\t128133: AddedToken(\"<|reserved_special_token_125|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
208
+ "\t128134: AddedToken(\"<|reserved_special_token_126|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
209
+ "\t128135: AddedToken(\"<|reserved_special_token_127|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
210
+ "\t128136: AddedToken(\"<|reserved_special_token_128|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
211
+ "\t128137: AddedToken(\"<|reserved_special_token_129|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
212
+ "\t128138: AddedToken(\"<|reserved_special_token_130|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
213
+ "\t128139: AddedToken(\"<|reserved_special_token_131|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
214
+ "\t128140: AddedToken(\"<|reserved_special_token_132|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
215
+ "\t128141: AddedToken(\"<|reserved_special_token_133|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
216
+ "\t128142: AddedToken(\"<|reserved_special_token_134|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
217
+ "\t128143: AddedToken(\"<|reserved_special_token_135|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
218
+ "\t128144: AddedToken(\"<|reserved_special_token_136|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
219
+ "\t128145: AddedToken(\"<|reserved_special_token_137|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
220
+ "\t128146: AddedToken(\"<|reserved_special_token_138|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
221
+ "\t128147: AddedToken(\"<|reserved_special_token_139|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
222
+ "\t128148: AddedToken(\"<|reserved_special_token_140|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
223
+ "\t128149: AddedToken(\"<|reserved_special_token_141|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
224
+ "\t128150: AddedToken(\"<|reserved_special_token_142|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
225
+ "\t128151: AddedToken(\"<|reserved_special_token_143|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
226
+ "\t128152: AddedToken(\"<|reserved_special_token_144|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
227
+ "\t128153: AddedToken(\"<|reserved_special_token_145|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
228
+ "\t128154: AddedToken(\"<|reserved_special_token_146|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
229
+ "\t128155: AddedToken(\"<|reserved_special_token_147|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
230
+ "\t128156: AddedToken(\"<|reserved_special_token_148|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
231
+ "\t128157: AddedToken(\"<|reserved_special_token_149|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
232
+ "\t128158: AddedToken(\"<|reserved_special_token_150|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
233
+ "\t128159: AddedToken(\"<|reserved_special_token_151|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
234
+ "\t128160: AddedToken(\"<|reserved_special_token_152|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
235
+ "\t128161: AddedToken(\"<|reserved_special_token_153|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
236
+ "\t128162: AddedToken(\"<|reserved_special_token_154|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
237
+ "\t128163: AddedToken(\"<|reserved_special_token_155|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
238
+ "\t128164: AddedToken(\"<|reserved_special_token_156|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
239
+ "\t128165: AddedToken(\"<|reserved_special_token_157|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
240
+ "\t128166: AddedToken(\"<|reserved_special_token_158|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
241
+ "\t128167: AddedToken(\"<|reserved_special_token_159|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
242
+ "\t128168: AddedToken(\"<|reserved_special_token_160|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
243
+ "\t128169: AddedToken(\"<|reserved_special_token_161|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
244
+ "\t128170: AddedToken(\"<|reserved_special_token_162|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
245
+ "\t128171: AddedToken(\"<|reserved_special_token_163|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
246
+ "\t128172: AddedToken(\"<|reserved_special_token_164|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
247
+ "\t128173: AddedToken(\"<|reserved_special_token_165|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
248
+ "\t128174: AddedToken(\"<|reserved_special_token_166|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
249
+ "\t128175: AddedToken(\"<|reserved_special_token_167|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
250
+ "\t128176: AddedToken(\"<|reserved_special_token_168|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
251
+ "\t128177: AddedToken(\"<|reserved_special_token_169|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
252
+ "\t128178: AddedToken(\"<|reserved_special_token_170|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
253
+ "\t128179: AddedToken(\"<|reserved_special_token_171|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
254
+ "\t128180: AddedToken(\"<|reserved_special_token_172|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
255
+ "\t128181: AddedToken(\"<|reserved_special_token_173|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
256
+ "\t128182: AddedToken(\"<|reserved_special_token_174|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
257
+ "\t128183: AddedToken(\"<|reserved_special_token_175|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
258
+ "\t128184: AddedToken(\"<|reserved_special_token_176|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
259
+ "\t128185: AddedToken(\"<|reserved_special_token_177|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
260
+ "\t128186: AddedToken(\"<|reserved_special_token_178|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
261
+ "\t128187: AddedToken(\"<|reserved_special_token_179|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
262
+ "\t128188: AddedToken(\"<|reserved_special_token_180|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
263
+ "\t128189: AddedToken(\"<|reserved_special_token_181|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
264
+ "\t128190: AddedToken(\"<|reserved_special_token_182|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
265
+ "\t128191: AddedToken(\"<|reserved_special_token_183|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
266
+ "\t128192: AddedToken(\"<|reserved_special_token_184|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
267
+ "\t128193: AddedToken(\"<|reserved_special_token_185|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
268
+ "\t128194: AddedToken(\"<|reserved_special_token_186|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
269
+ "\t128195: AddedToken(\"<|reserved_special_token_187|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
270
+ "\t128196: AddedToken(\"<|reserved_special_token_188|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
271
+ "\t128197: AddedToken(\"<|reserved_special_token_189|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
272
+ "\t128198: AddedToken(\"<|reserved_special_token_190|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
273
+ "\t128199: AddedToken(\"<|reserved_special_token_191|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
274
+ "\t128200: AddedToken(\"<|reserved_special_token_192|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
275
+ "\t128201: AddedToken(\"<|reserved_special_token_193|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
276
+ "\t128202: AddedToken(\"<|reserved_special_token_194|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
277
+ "\t128203: AddedToken(\"<|reserved_special_token_195|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
278
+ "\t128204: AddedToken(\"<|reserved_special_token_196|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
279
+ "\t128205: AddedToken(\"<|reserved_special_token_197|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
280
+ "\t128206: AddedToken(\"<|reserved_special_token_198|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
281
+ "\t128207: AddedToken(\"<|reserved_special_token_199|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
282
+ "\t128208: AddedToken(\"<|reserved_special_token_200|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
283
+ "\t128209: AddedToken(\"<|reserved_special_token_201|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
284
+ "\t128210: AddedToken(\"<|reserved_special_token_202|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
285
+ "\t128211: AddedToken(\"<|reserved_special_token_203|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
286
+ "\t128212: AddedToken(\"<|reserved_special_token_204|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
287
+ "\t128213: AddedToken(\"<|reserved_special_token_205|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
288
+ "\t128214: AddedToken(\"<|reserved_special_token_206|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
289
+ "\t128215: AddedToken(\"<|reserved_special_token_207|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
290
+ "\t128216: AddedToken(\"<|reserved_special_token_208|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
291
+ "\t128217: AddedToken(\"<|reserved_special_token_209|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
292
+ "\t128218: AddedToken(\"<|reserved_special_token_210|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
293
+ "\t128219: AddedToken(\"<|reserved_special_token_211|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
294
+ "\t128220: AddedToken(\"<|reserved_special_token_212|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
295
+ "\t128221: AddedToken(\"<|reserved_special_token_213|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
296
+ "\t128222: AddedToken(\"<|reserved_special_token_214|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
297
+ "\t128223: AddedToken(\"<|reserved_special_token_215|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
298
+ "\t128224: AddedToken(\"<|reserved_special_token_216|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
299
+ "\t128225: AddedToken(\"<|reserved_special_token_217|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
300
+ "\t128226: AddedToken(\"<|reserved_special_token_218|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
301
+ "\t128227: AddedToken(\"<|reserved_special_token_219|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
302
+ "\t128228: AddedToken(\"<|reserved_special_token_220|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
303
+ "\t128229: AddedToken(\"<|reserved_special_token_221|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
304
+ "\t128230: AddedToken(\"<|reserved_special_token_222|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
305
+ "\t128231: AddedToken(\"<|reserved_special_token_223|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
306
+ "\t128232: AddedToken(\"<|reserved_special_token_224|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
307
+ "\t128233: AddedToken(\"<|reserved_special_token_225|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
308
+ "\t128234: AddedToken(\"<|reserved_special_token_226|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
309
+ "\t128235: AddedToken(\"<|reserved_special_token_227|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
310
+ "\t128236: AddedToken(\"<|reserved_special_token_228|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
311
+ "\t128237: AddedToken(\"<|reserved_special_token_229|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
312
+ "\t128238: AddedToken(\"<|reserved_special_token_230|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
313
+ "\t128239: AddedToken(\"<|reserved_special_token_231|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
314
+ "\t128240: AddedToken(\"<|reserved_special_token_232|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
315
+ "\t128241: AddedToken(\"<|reserved_special_token_233|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
316
+ "\t128242: AddedToken(\"<|reserved_special_token_234|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
317
+ "\t128243: AddedToken(\"<|reserved_special_token_235|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
318
+ "\t128244: AddedToken(\"<|reserved_special_token_236|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
319
+ "\t128245: AddedToken(\"<|reserved_special_token_237|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
320
+ "\t128246: AddedToken(\"<|reserved_special_token_238|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
321
+ "\t128247: AddedToken(\"<|reserved_special_token_239|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
322
+ "\t128248: AddedToken(\"<|reserved_special_token_240|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
323
+ "\t128249: AddedToken(\"<|reserved_special_token_241|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
324
+ "\t128250: AddedToken(\"<|reserved_special_token_242|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
325
+ "\t128251: AddedToken(\"<|reserved_special_token_243|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
326
+ "\t128252: AddedToken(\"<|reserved_special_token_244|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
327
+ "\t128253: AddedToken(\"<|reserved_special_token_245|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
328
+ "\t128254: AddedToken(\"<|reserved_special_token_246|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
329
+ "\t128255: AddedToken(\"<|reserved_special_token_247|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
330
+ "\t128256: AddedToken(\"<audio_soft_token>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
331
+ "\t128257: AddedToken(\"<start_of_audio>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
332
+ "\t128258: AddedToken(\"<end_of_audio>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
333
+ "}\n",
334
+ ")"
335
+ ]
336
+ },
337
+ "execution_count": 3,
338
+ "metadata": {},
339
+ "output_type": "execute_result"
340
+ }
341
+ ],
342
+ "source": [
343
+ "tokenizer"
344
+ ]
345
+ },
346
+ {
347
+ "cell_type": "code",
348
+ "execution_count": 7,
349
+ "metadata": {},
350
+ "outputs": [
351
+ {
352
+ "data": {
353
+ "text/plain": [
354
+ "3"
355
+ ]
356
+ },
357
+ "execution_count": 7,
358
+ "metadata": {},
359
+ "output_type": "execute_result"
360
+ }
361
+ ],
362
+ "source": [
363
+ "tokenizer.add_special_tokens({'additional_special_tokens':['<audio_soft_token>','<start_of_audio>','<end_of_audio>','']})"
364
+ ]
365
+ },
366
+ {
367
+ "cell_type": "code",
368
+ "execution_count": 8,
369
+ "metadata": {},
370
+ "outputs": [
371
+ {
372
+ "data": {
373
+ "text/plain": [
374
+ "PreTrainedTokenizerFast(name_or_path='/mnt/jeff/InCar/LLamaNemotronOmni/Llama-3.1-Nemotron-Nano-4B-v1.1', vocab_size=128000, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|begin_of_text|>', 'eos_token': '<|eot_id|>', 'pad_token': '<|eot_id|>', 'additional_special_tokens': ['<audio_soft_token>', '<start_of_audio>', '<end_of_audio>', '']}, clean_up_tokenization_spaces=True, added_tokens_decoder={\n",
375
+ "\t128000: AddedToken(\"<|begin_of_text|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
376
+ "\t128001: AddedToken(\"<|end_of_text|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
377
+ "\t128002: AddedToken(\"<|reserved_special_token_0|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
378
+ "\t128003: AddedToken(\"<|reserved_special_token_1|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
379
+ "\t128004: AddedToken(\"<|finetune_right_pad_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
380
+ "\t128005: AddedToken(\"<|reserved_special_token_2|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
381
+ "\t128006: AddedToken(\"<|start_header_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
382
+ "\t128007: AddedToken(\"<|end_header_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
383
+ "\t128008: AddedToken(\"<|eom_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
384
+ "\t128009: AddedToken(\"<|eot_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
385
+ "\t128010: AddedToken(\"<|python_tag|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
386
+ "\t128011: AddedToken(\"<|reserved_special_token_3|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
387
+ "\t128012: AddedToken(\"<|reserved_special_token_4|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
388
+ "\t128013: AddedToken(\"<|reserved_special_token_5|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
389
+ "\t128014: AddedToken(\"<|reserved_special_token_6|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
390
+ "\t128015: AddedToken(\"<|reserved_special_token_7|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
391
+ "\t128016: AddedToken(\"<|reserved_special_token_8|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
392
+ "\t128017: AddedToken(\"<|reserved_special_token_9|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
393
+ "\t128018: AddedToken(\"<|reserved_special_token_10|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
394
+ "\t128019: AddedToken(\"<|reserved_special_token_11|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
395
+ "\t128020: AddedToken(\"<|reserved_special_token_12|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
396
+ "\t128021: AddedToken(\"<|reserved_special_token_13|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
397
+ "\t128022: AddedToken(\"<|reserved_special_token_14|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
398
+ "\t128023: AddedToken(\"<|reserved_special_token_15|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
399
+ "\t128024: AddedToken(\"<|reserved_special_token_16|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
400
+ "\t128025: AddedToken(\"<|reserved_special_token_17|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
401
+ "\t128026: AddedToken(\"<|reserved_special_token_18|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
402
+ "\t128027: AddedToken(\"<|reserved_special_token_19|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
403
+ "\t128028: AddedToken(\"<|reserved_special_token_20|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
404
+ "\t128029: AddedToken(\"<|reserved_special_token_21|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
405
+ "\t128030: AddedToken(\"<|reserved_special_token_22|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
406
+ "\t128031: AddedToken(\"<|reserved_special_token_23|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
407
+ "\t128032: AddedToken(\"<|reserved_special_token_24|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
408
+ "\t128033: AddedToken(\"<|reserved_special_token_25|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
409
+ "\t128034: AddedToken(\"<|reserved_special_token_26|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
410
+ "\t128035: AddedToken(\"<|reserved_special_token_27|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
411
+ "\t128036: AddedToken(\"<|reserved_special_token_28|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
412
+ "\t128037: AddedToken(\"<|reserved_special_token_29|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
413
+ "\t128038: AddedToken(\"<|reserved_special_token_30|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
414
+ "\t128039: AddedToken(\"<|reserved_special_token_31|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
415
+ "\t128040: AddedToken(\"<|reserved_special_token_32|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
416
+ "\t128041: AddedToken(\"<|reserved_special_token_33|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
417
+ "\t128042: AddedToken(\"<|reserved_special_token_34|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
418
+ "\t128043: AddedToken(\"<|reserved_special_token_35|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
419
+ "\t128044: AddedToken(\"<|reserved_special_token_36|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
420
+ "\t128045: AddedToken(\"<|reserved_special_token_37|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
421
+ "\t128046: AddedToken(\"<|reserved_special_token_38|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
422
+ "\t128047: AddedToken(\"<|reserved_special_token_39|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
423
+ "\t128048: AddedToken(\"<|reserved_special_token_40|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
424
+ "\t128049: AddedToken(\"<|reserved_special_token_41|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
425
+ "\t128050: AddedToken(\"<|reserved_special_token_42|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
426
+ "\t128051: AddedToken(\"<|reserved_special_token_43|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
427
+ "\t128052: AddedToken(\"<|reserved_special_token_44|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
428
+ "\t128053: AddedToken(\"<|reserved_special_token_45|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
429
+ "\t128054: AddedToken(\"<|reserved_special_token_46|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
430
+ "\t128055: AddedToken(\"<|reserved_special_token_47|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
431
+ "\t128056: AddedToken(\"<|reserved_special_token_48|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
432
+ "\t128057: AddedToken(\"<|reserved_special_token_49|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
433
+ "\t128058: AddedToken(\"<|reserved_special_token_50|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
434
+ "\t128059: AddedToken(\"<|reserved_special_token_51|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
435
+ "\t128060: AddedToken(\"<|reserved_special_token_52|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
436
+ "\t128061: AddedToken(\"<|reserved_special_token_53|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
437
+ "\t128062: AddedToken(\"<|reserved_special_token_54|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
438
+ "\t128063: AddedToken(\"<|reserved_special_token_55|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
439
+ "\t128064: AddedToken(\"<|reserved_special_token_56|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
440
+ "\t128065: AddedToken(\"<|reserved_special_token_57|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
441
+ "\t128066: AddedToken(\"<|reserved_special_token_58|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
442
+ "\t128067: AddedToken(\"<|reserved_special_token_59|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
443
+ "\t128068: AddedToken(\"<|reserved_special_token_60|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
444
+ "\t128069: AddedToken(\"<|reserved_special_token_61|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
445
+ "\t128070: AddedToken(\"<|reserved_special_token_62|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
446
+ "\t128071: AddedToken(\"<|reserved_special_token_63|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
447
+ "\t128072: AddedToken(\"<|reserved_special_token_64|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
448
+ "\t128073: AddedToken(\"<|reserved_special_token_65|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
449
+ "\t128074: AddedToken(\"<|reserved_special_token_66|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
450
+ "\t128075: AddedToken(\"<|reserved_special_token_67|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
451
+ "\t128076: AddedToken(\"<|reserved_special_token_68|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
452
+ "\t128077: AddedToken(\"<|reserved_special_token_69|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
453
+ "\t128078: AddedToken(\"<|reserved_special_token_70|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
454
+ "\t128079: AddedToken(\"<|reserved_special_token_71|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
455
+ "\t128080: AddedToken(\"<|reserved_special_token_72|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
456
+ "\t128081: AddedToken(\"<|reserved_special_token_73|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
457
+ "\t128082: AddedToken(\"<|reserved_special_token_74|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
458
+ "\t128083: AddedToken(\"<|reserved_special_token_75|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
459
+ "\t128084: AddedToken(\"<|reserved_special_token_76|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
460
+ "\t128085: AddedToken(\"<|reserved_special_token_77|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
461
+ "\t128086: AddedToken(\"<|reserved_special_token_78|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
462
+ "\t128087: AddedToken(\"<|reserved_special_token_79|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
463
+ "\t128088: AddedToken(\"<|reserved_special_token_80|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
464
+ "\t128089: AddedToken(\"<|reserved_special_token_81|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
465
+ "\t128090: AddedToken(\"<|reserved_special_token_82|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
466
+ "\t128091: AddedToken(\"<|reserved_special_token_83|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
467
+ "\t128092: AddedToken(\"<|reserved_special_token_84|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
468
+ "\t128093: AddedToken(\"<|reserved_special_token_85|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
469
+ "\t128094: AddedToken(\"<|reserved_special_token_86|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
470
+ "\t128095: AddedToken(\"<|reserved_special_token_87|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
471
+ "\t128096: AddedToken(\"<|reserved_special_token_88|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
472
+ "\t128097: AddedToken(\"<|reserved_special_token_89|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
473
+ "\t128098: AddedToken(\"<|reserved_special_token_90|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
474
+ "\t128099: AddedToken(\"<|reserved_special_token_91|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
475
+ "\t128100: AddedToken(\"<|reserved_special_token_92|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
476
+ "\t128101: AddedToken(\"<|reserved_special_token_93|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
477
+ "\t128102: AddedToken(\"<|reserved_special_token_94|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
478
+ "\t128103: AddedToken(\"<|reserved_special_token_95|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
479
+ "\t128104: AddedToken(\"<|reserved_special_token_96|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
480
+ "\t128105: AddedToken(\"<|reserved_special_token_97|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
481
+ "\t128106: AddedToken(\"<|reserved_special_token_98|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
482
+ "\t128107: AddedToken(\"<|reserved_special_token_99|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
483
+ "\t128108: AddedToken(\"<|reserved_special_token_100|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
484
+ "\t128109: AddedToken(\"<|reserved_special_token_101|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
485
+ "\t128110: AddedToken(\"<|reserved_special_token_102|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
486
+ "\t128111: AddedToken(\"<|reserved_special_token_103|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
487
+ "\t128112: AddedToken(\"<|reserved_special_token_104|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
488
+ "\t128113: AddedToken(\"<|reserved_special_token_105|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
489
+ "\t128114: AddedToken(\"<|reserved_special_token_106|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
490
+ "\t128115: AddedToken(\"<|reserved_special_token_107|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
491
+ "\t128116: AddedToken(\"<|reserved_special_token_108|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
492
+ "\t128117: AddedToken(\"<|reserved_special_token_109|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
493
+ "\t128118: AddedToken(\"<|reserved_special_token_110|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
494
+ "\t128119: AddedToken(\"<|reserved_special_token_111|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
495
+ "\t128120: AddedToken(\"<|reserved_special_token_112|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
496
+ "\t128121: AddedToken(\"<|reserved_special_token_113|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
497
+ "\t128122: AddedToken(\"<|reserved_special_token_114|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
498
+ "\t128123: AddedToken(\"<|reserved_special_token_115|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
499
+ "\t128124: AddedToken(\"<|reserved_special_token_116|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
500
+ "\t128125: AddedToken(\"<|reserved_special_token_117|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
501
+ "\t128126: AddedToken(\"<|reserved_special_token_118|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
502
+ "\t128127: AddedToken(\"<|reserved_special_token_119|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
503
+ "\t128128: AddedToken(\"<|reserved_special_token_120|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
504
+ "\t128129: AddedToken(\"<|reserved_special_token_121|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
505
+ "\t128130: AddedToken(\"<|reserved_special_token_122|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
506
+ "\t128131: AddedToken(\"<|reserved_special_token_123|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
507
+ "\t128132: AddedToken(\"<|reserved_special_token_124|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
508
+ "\t128133: AddedToken(\"<|reserved_special_token_125|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
509
+ "\t128134: AddedToken(\"<|reserved_special_token_126|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
510
+ "\t128135: AddedToken(\"<|reserved_special_token_127|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
511
+ "\t128136: AddedToken(\"<|reserved_special_token_128|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
512
+ "\t128137: AddedToken(\"<|reserved_special_token_129|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
513
+ "\t128138: AddedToken(\"<|reserved_special_token_130|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
514
+ "\t128139: AddedToken(\"<|reserved_special_token_131|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
515
+ "\t128140: AddedToken(\"<|reserved_special_token_132|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
516
+ "\t128141: AddedToken(\"<|reserved_special_token_133|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
517
+ "\t128142: AddedToken(\"<|reserved_special_token_134|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
518
+ "\t128143: AddedToken(\"<|reserved_special_token_135|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
519
+ "\t128144: AddedToken(\"<|reserved_special_token_136|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
520
+ "\t128145: AddedToken(\"<|reserved_special_token_137|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
521
+ "\t128146: AddedToken(\"<|reserved_special_token_138|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
522
+ "\t128147: AddedToken(\"<|reserved_special_token_139|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
523
+ "\t128148: AddedToken(\"<|reserved_special_token_140|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
524
+ "\t128149: AddedToken(\"<|reserved_special_token_141|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
525
+ "\t128150: AddedToken(\"<|reserved_special_token_142|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
526
+ "\t128151: AddedToken(\"<|reserved_special_token_143|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
527
+ "\t128152: AddedToken(\"<|reserved_special_token_144|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
528
+ "\t128153: AddedToken(\"<|reserved_special_token_145|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
529
+ "\t128154: AddedToken(\"<|reserved_special_token_146|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
530
+ "\t128155: AddedToken(\"<|reserved_special_token_147|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
531
+ "\t128156: AddedToken(\"<|reserved_special_token_148|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
532
+ "\t128157: AddedToken(\"<|reserved_special_token_149|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
533
+ "\t128158: AddedToken(\"<|reserved_special_token_150|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
534
+ "\t128159: AddedToken(\"<|reserved_special_token_151|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
535
+ "\t128160: AddedToken(\"<|reserved_special_token_152|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
536
+ "\t128161: AddedToken(\"<|reserved_special_token_153|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
537
+ "\t128162: AddedToken(\"<|reserved_special_token_154|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
538
+ "\t128163: AddedToken(\"<|reserved_special_token_155|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
539
+ "\t128164: AddedToken(\"<|reserved_special_token_156|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
540
+ "\t128165: AddedToken(\"<|reserved_special_token_157|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
541
+ "\t128166: AddedToken(\"<|reserved_special_token_158|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
542
+ "\t128167: AddedToken(\"<|reserved_special_token_159|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
543
+ "\t128168: AddedToken(\"<|reserved_special_token_160|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
544
+ "\t128169: AddedToken(\"<|reserved_special_token_161|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
545
+ "\t128170: AddedToken(\"<|reserved_special_token_162|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
546
+ "\t128171: AddedToken(\"<|reserved_special_token_163|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
547
+ "\t128172: AddedToken(\"<|reserved_special_token_164|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
548
+ "\t128173: AddedToken(\"<|reserved_special_token_165|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
549
+ "\t128174: AddedToken(\"<|reserved_special_token_166|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
550
+ "\t128175: AddedToken(\"<|reserved_special_token_167|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
551
+ "\t128176: AddedToken(\"<|reserved_special_token_168|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
552
+ "\t128177: AddedToken(\"<|reserved_special_token_169|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
553
+ "\t128178: AddedToken(\"<|reserved_special_token_170|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
554
+ "\t128179: AddedToken(\"<|reserved_special_token_171|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
555
+ "\t128180: AddedToken(\"<|reserved_special_token_172|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
556
+ "\t128181: AddedToken(\"<|reserved_special_token_173|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
557
+ "\t128182: AddedToken(\"<|reserved_special_token_174|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
558
+ "\t128183: AddedToken(\"<|reserved_special_token_175|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
559
+ "\t128184: AddedToken(\"<|reserved_special_token_176|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
560
+ "\t128185: AddedToken(\"<|reserved_special_token_177|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
561
+ "\t128186: AddedToken(\"<|reserved_special_token_178|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
562
+ "\t128187: AddedToken(\"<|reserved_special_token_179|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
563
+ "\t128188: AddedToken(\"<|reserved_special_token_180|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
564
+ "\t128189: AddedToken(\"<|reserved_special_token_181|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
565
+ "\t128190: AddedToken(\"<|reserved_special_token_182|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
566
+ "\t128191: AddedToken(\"<|reserved_special_token_183|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
567
+ "\t128192: AddedToken(\"<|reserved_special_token_184|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
568
+ "\t128193: AddedToken(\"<|reserved_special_token_185|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
569
+ "\t128194: AddedToken(\"<|reserved_special_token_186|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
570
+ "\t128195: AddedToken(\"<|reserved_special_token_187|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
571
+ "\t128196: AddedToken(\"<|reserved_special_token_188|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
572
+ "\t128197: AddedToken(\"<|reserved_special_token_189|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
573
+ "\t128198: AddedToken(\"<|reserved_special_token_190|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
574
+ "\t128199: AddedToken(\"<|reserved_special_token_191|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
575
+ "\t128200: AddedToken(\"<|reserved_special_token_192|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
576
+ "\t128201: AddedToken(\"<|reserved_special_token_193|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
577
+ "\t128202: AddedToken(\"<|reserved_special_token_194|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
578
+ "\t128203: AddedToken(\"<|reserved_special_token_195|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
579
+ "\t128204: AddedToken(\"<|reserved_special_token_196|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
580
+ "\t128205: AddedToken(\"<|reserved_special_token_197|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
581
+ "\t128206: AddedToken(\"<|reserved_special_token_198|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
582
+ "\t128207: AddedToken(\"<|reserved_special_token_199|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
583
+ "\t128208: AddedToken(\"<|reserved_special_token_200|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
584
+ "\t128209: AddedToken(\"<|reserved_special_token_201|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
585
+ "\t128210: AddedToken(\"<|reserved_special_token_202|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
586
+ "\t128211: AddedToken(\"<|reserved_special_token_203|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
587
+ "\t128212: AddedToken(\"<|reserved_special_token_204|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
588
+ "\t128213: AddedToken(\"<|reserved_special_token_205|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
589
+ "\t128214: AddedToken(\"<|reserved_special_token_206|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
590
+ "\t128215: AddedToken(\"<|reserved_special_token_207|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
591
+ "\t128216: AddedToken(\"<|reserved_special_token_208|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
592
+ "\t128217: AddedToken(\"<|reserved_special_token_209|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
593
+ "\t128218: AddedToken(\"<|reserved_special_token_210|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
594
+ "\t128219: AddedToken(\"<|reserved_special_token_211|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
595
+ "\t128220: AddedToken(\"<|reserved_special_token_212|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
596
+ "\t128221: AddedToken(\"<|reserved_special_token_213|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
597
+ "\t128222: AddedToken(\"<|reserved_special_token_214|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
598
+ "\t128223: AddedToken(\"<|reserved_special_token_215|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
599
+ "\t128224: AddedToken(\"<|reserved_special_token_216|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
600
+ "\t128225: AddedToken(\"<|reserved_special_token_217|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
601
+ "\t128226: AddedToken(\"<|reserved_special_token_218|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
602
+ "\t128227: AddedToken(\"<|reserved_special_token_219|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
603
+ "\t128228: AddedToken(\"<|reserved_special_token_220|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
604
+ "\t128229: AddedToken(\"<|reserved_special_token_221|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
605
+ "\t128230: AddedToken(\"<|reserved_special_token_222|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
606
+ "\t128231: AddedToken(\"<|reserved_special_token_223|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
607
+ "\t128232: AddedToken(\"<|reserved_special_token_224|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
608
+ "\t128233: AddedToken(\"<|reserved_special_token_225|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
609
+ "\t128234: AddedToken(\"<|reserved_special_token_226|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
610
+ "\t128235: AddedToken(\"<|reserved_special_token_227|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
611
+ "\t128236: AddedToken(\"<|reserved_special_token_228|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
612
+ "\t128237: AddedToken(\"<|reserved_special_token_229|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
613
+ "\t128238: AddedToken(\"<|reserved_special_token_230|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
614
+ "\t128239: AddedToken(\"<|reserved_special_token_231|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
615
+ "\t128240: AddedToken(\"<|reserved_special_token_232|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
616
+ "\t128241: AddedToken(\"<|reserved_special_token_233|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
617
+ "\t128242: AddedToken(\"<|reserved_special_token_234|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
618
+ "\t128243: AddedToken(\"<|reserved_special_token_235|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
619
+ "\t128244: AddedToken(\"<|reserved_special_token_236|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
620
+ "\t128245: AddedToken(\"<|reserved_special_token_237|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
621
+ "\t128246: AddedToken(\"<|reserved_special_token_238|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
622
+ "\t128247: AddedToken(\"<|reserved_special_token_239|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
623
+ "\t128248: AddedToken(\"<|reserved_special_token_240|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
624
+ "\t128249: AddedToken(\"<|reserved_special_token_241|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
625
+ "\t128250: AddedToken(\"<|reserved_special_token_242|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
626
+ "\t128251: AddedToken(\"<|reserved_special_token_243|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
627
+ "\t128252: AddedToken(\"<|reserved_special_token_244|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
628
+ "\t128253: AddedToken(\"<|reserved_special_token_245|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
629
+ "\t128254: AddedToken(\"<|reserved_special_token_246|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
630
+ "\t128255: AddedToken(\"<|reserved_special_token_247|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
631
+ "\t128256: AddedToken(\"<audio_soft_token>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
632
+ "\t128257: AddedToken(\"<start_of_audio>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
633
+ "\t128258: AddedToken(\"<end_of_audio>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
634
+ "}\n",
635
+ ")"
636
+ ]
637
+ },
638
+ "execution_count": 8,
639
+ "metadata": {},
640
+ "output_type": "execute_result"
641
+ }
642
+ ],
643
+ "source": [
644
+ "tokenizer"
645
+ ]
646
+ },
647
+ {
648
+ "cell_type": "code",
649
+ "execution_count": 6,
650
+ "metadata": {},
651
+ "outputs": [],
652
+ "source": [
653
+ "tokenizer.audio_token = \"<audio_soft_token>\"\n",
654
+ "tokenizer.boa_token = \"<start_of_audio>\"\n",
655
+ "tokenizer.eoa_token = \"<end_of_audio>\"\n",
656
+ "tokenizer.audio_token_id = 128256"
657
+ ]
658
+ },
659
+ {
660
+ "cell_type": "code",
661
+ "execution_count": 7,
662
+ "metadata": {},
663
+ "outputs": [
664
+ {
665
+ "data": {
666
+ "text/plain": [
667
+ "('../tokenizer/tokenizer_config.json',\n",
668
+ " '../tokenizer/special_tokens_map.json',\n",
669
+ " '../tokenizer/tokenizer.json')"
670
+ ]
671
+ },
672
+ "execution_count": 7,
673
+ "metadata": {},
674
+ "output_type": "execute_result"
675
+ }
676
+ ],
677
+ "source": [
678
+ "tokenizer.save_pretrained('../tokenizer')"
679
+ ]
680
+ },
681
+ {
682
+ "cell_type": "code",
683
+ "execution_count": 1,
684
+ "metadata": {},
685
+ "outputs": [
686
+ {
687
+ "name": "stderr",
688
+ "output_type": "stream",
689
+ "text": [
690
+ "You are using a model of type NemotronOmni to instantiate a model of type nemotronOmni. This is not supported for all configurations of models and can yield errors.\n"
691
+ ]
692
+ }
693
+ ],
694
+ "source": [
695
+ "from transformers import AutoConfig\n",
696
+ "config = AutoConfig.from_pretrained('/mnt/jeff/InCar/LlamaNemotronOmni/Llama-3.1-NemotronOmni')"
697
+ ]
698
+ },
699
+ {
700
+ "cell_type": "code",
701
+ "execution_count": 7,
702
+ "metadata": {},
703
+ "outputs": [
704
+ {
705
+ "data": {
706
+ "text/plain": [
707
+ "'llama'"
708
+ ]
709
+ },
710
+ "execution_count": 7,
711
+ "metadata": {},
712
+ "output_type": "execute_result"
713
+ }
714
+ ],
715
+ "source": [
716
+ "config.model_type"
717
+ ]
718
+ },
719
+ {
720
+ "cell_type": "code",
721
+ "execution_count": null,
722
+ "metadata": {},
723
+ "outputs": [],
724
+ "source": []
725
+ }
726
+ ],
727
+ "metadata": {
728
+ "kernelspec": {
729
+ "display_name": "base",
730
+ "language": "python",
731
+ "name": "python3"
732
+ },
733
+ "language_info": {
734
+ "codemirror_mode": {
735
+ "name": "ipython",
736
+ "version": 3
737
+ },
738
+ "file_extension": ".py",
739
+ "mimetype": "text/x-python",
740
+ "name": "python",
741
+ "nbconvert_exporter": "python",
742
+ "pygments_lexer": "ipython3",
743
+ "version": "3.12.7"
744
+ }
745
+ },
746
+ "nbformat": 4,
747
+ "nbformat_minor": 2
748
+ }
build_model.ipynb ADDED
@@ -0,0 +1,525 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "[2025-06-18 08:09:49,587] [INFO] [real_accelerator.py:239:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
13
+ ]
14
+ },
15
+ {
16
+ "name": "stderr",
17
+ "output_type": "stream",
18
+ "text": [
19
+ "/mnt/jeff/anaconda/compiler_compat/ld: cannot find -laio: No such file or directory\n",
20
+ "collect2: error: ld returned 1 exit status\n",
21
+ "/mnt/jeff/anaconda/compiler_compat/ld: warning: librt.so.1, needed by /usr/local/cuda/lib64/libcufile.so, not found (try using -rpath or -rpath-link)\n",
22
+ "/mnt/jeff/anaconda/compiler_compat/ld: warning: libpthread.so.0, needed by /usr/local/cuda/lib64/libcufile.so, not found (try using -rpath or -rpath-link)\n",
23
+ "/mnt/jeff/anaconda/compiler_compat/ld: warning: libstdc++.so.6, needed by /usr/local/cuda/lib64/libcufile.so, not found (try using -rpath or -rpath-link)\n",
24
+ "/mnt/jeff/anaconda/compiler_compat/ld: warning: libm.so.6, needed by /usr/local/cuda/lib64/libcufile.so, not found (try using -rpath or -rpath-link)\n",
25
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::runtime_error::~runtime_error()@GLIBCXX_3.4'\n",
26
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__gxx_personality_v0@CXXABI_1.3'\n",
27
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::tellp()@GLIBCXX_3.4'\n",
28
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::chrono::_V2::steady_clock::now()@GLIBCXX_3.4.19'\n",
29
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_replace_aux(unsigned long, unsigned long, unsigned long, char)@GLIBCXX_3.4'\n",
30
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for bool@CXXABI_1.3'\n",
31
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_logic_error(char const*)@GLIBCXX_3.4'\n",
32
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ostringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
33
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::logic_error@GLIBCXX_3.4'\n",
34
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::~locale()@GLIBCXX_3.4'\n",
35
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(std::string const&, unsigned long, unsigned long)@GLIBCXX_3.4'\n",
36
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_end_catch@CXXABI_1.3'\n",
37
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ofstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
38
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::logic_error::~logic_error()@GLIBCXX_3.4'\n",
39
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for __cxxabiv1::__si_class_type_info@CXXABI_1.3'\n",
40
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<char, std::char_traits<char> >::_M_cache_locale(std::locale const&)@GLIBCXX_3.4'\n",
41
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
42
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator new[](unsigned long)@GLIBCXX_3.4'\n",
43
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_leak_hard()@GLIBCXX_3.4'\n",
44
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ifstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
45
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf<wchar_t, std::char_traits<wchar_t> >::basic_streambuf(std::basic_streambuf<wchar_t, std::char_traits<wchar_t> > const&)@GLIBCXX_3.4'\n",
46
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::append(char const*, unsigned long)@GLIBCXX_3.4'\n",
47
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(std::string const&)@GLIBCXX_3.4'\n",
48
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned short@CXXABI_1.3'\n",
49
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::resize(unsigned long, char)@GLIBCXX_3.4'\n",
50
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for char const*@CXXABI_1.3'\n",
51
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ctype<char>::_M_widen_init() const@GLIBCXX_3.4.11'\n",
52
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_invalid_argument(char const*)@GLIBCXX_3.4'\n",
53
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::operator=(std::locale const&)@GLIBCXX_3.4'\n",
54
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<wchar_t, std::char_traits<wchar_t> >::_M_cache_locale(std::locale const&)@GLIBCXX_3.4'\n",
55
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_decrement(std::_Rb_tree_node_base const*)@GLIBCXX_3.4'\n",
56
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_free_exception@CXXABI_1.3'\n",
57
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::condition_variable::notify_one()@GLIBCXX_3.4.11'\n",
58
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::Init::~Init()@GLIBCXX_3.4'\n",
59
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::~basic_string()@GLIBCXX_3.4'\n",
60
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_pure_virtual@CXXABI_1.3'\n",
61
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::flush()@GLIBCXX_3.4'\n",
62
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for __cxxabiv1::__class_type_info@CXXABI_1.3'\n",
63
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_rethrow@CXXABI_1.3'\n",
64
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringbuf<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
65
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_fstream<char, std::char_traits<char> >::~basic_fstream()@GLIBCXX_3.4'\n",
66
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::compare(char const*) const@GLIBCXX_3.4'\n",
67
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ostringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
68
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::locale()@GLIBCXX_3.4'\n",
69
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::chrono::_V2::system_clock::now()@GLIBCXX_3.4.19'\n",
70
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ifstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
71
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Hash_bytes(void const*, unsigned long, unsigned long)@CXXABI_1.3.5'\n",
72
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<long long>(long long)@GLIBCXX_3.4.9'\n",
73
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for char*@CXXABI_1.3'\n",
74
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_Prime_rehash_policy::_M_need_rehash(unsigned long, unsigned long, unsigned long) const@GLIBCXX_3.4.18'\n",
75
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::out_of_range@GLIBCXX_3.4'\n",
76
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<unsigned long>(unsigned long)@GLIBCXX_3.4.9'\n",
77
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_increment(std::_Rb_tree_node_base const*)@GLIBCXX_3.4'\n",
78
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::~ios_base()@GLIBCXX_3.4'\n",
79
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::range_error::~range_error()@GLIBCXX_3.4'\n",
80
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__basic_file<char>::~__basic_file()@GLIBCXX_3.4'\n",
81
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_guard_acquire@CXXABI_1.3'\n",
82
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<bool>(bool)@GLIBCXX_3.4.9'\n",
83
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::overflow_error@GLIBCXX_3.4'\n",
84
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_fstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
85
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::range_error@GLIBCXX_3.4'\n",
86
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ios<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
87
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_filebuf<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
88
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator delete[](void*)@GLIBCXX_3.4'\n",
89
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
90
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(unsigned long, char, std::allocator<char> const&)@GLIBCXX_3.4'\n",
91
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_List_node_base::_M_transfer(std::__detail::_List_node_base*, std::__detail::_List_node_base*)@GLIBCXX_3.4.15'\n",
92
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::replace(unsigned long, unsigned long, char const*, unsigned long)@GLIBCXX_3.4'\n",
93
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for std::exception@GLIBCXX_3.4'\n",
94
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >::_Rep::_M_destroy(std::allocator<wchar_t> const&)@GLIBCXX_3.4'\n",
95
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::istream& std::istream::_M_extract<double>(double&)@GLIBCXX_3.4.9'\n",
96
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf<char, std::char_traits<char> >::close()@GLIBCXX_3.4'\n",
97
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_fstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
98
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ifstream<char, std::char_traits<char> >::basic_ifstream(char const*, std::_Ios_Openmode)@GLIBCXX_3.4'\n",
99
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::append(std::string const&)@GLIBCXX_3.4'\n",
100
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator new(unsigned long)@GLIBCXX_3.4'\n",
101
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_istringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
102
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned int@CXXABI_1.3'\n",
103
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::append(char const*)@GLIBCXX_3.4'\n",
104
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::domain_error@GLIBCXX_3.4'\n",
105
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::find(char, unsigned long) const@GLIBCXX_3.4'\n",
106
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::put(char)@GLIBCXX_3.4'\n",
107
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for int@CXXABI_1.3'\n",
108
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_bad_alloc()@GLIBCXX_3.4'\n",
109
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_thread_atexit@CXXABI_1.3.7'\n",
110
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned int*@CXXABI_1.3'\n",
111
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_increment(std::_Rb_tree_node_base*)@GLIBCXX_3.4'\n",
112
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ifstream<char, std::char_traits<char> >::~basic_ifstream()@GLIBCXX_3.4'\n",
113
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::Init::Init()@GLIBCXX_3.4'\n",
114
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::condition_variable::condition_variable()@GLIBCXX_3.4.11'\n",
115
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf<char, std::char_traits<char> >::basic_filebuf()@GLIBCXX_3.4'\n",
116
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_istringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
117
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::domain_error::~domain_error()@GLIBCXX_3.4'\n",
118
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::cerr@GLIBCXX_3.4'\n",
119
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::find(char const*, unsigned long, unsigned long) const@GLIBCXX_3.4'\n",
120
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_istringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
121
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(std::allocator<char> const&)@GLIBCXX_3.4'\n",
122
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringbuf<char, std::char_traits<char>, std::allocator<char> >::str() const@GLIBCXX_3.4'\n",
123
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::invalid_argument@GLIBCXX_3.4'\n",
124
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for void*@CXXABI_1.3'\n",
125
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::assign(std::string const&)@GLIBCXX_3.4'\n",
126
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ostringstream<char, std::char_traits<char>, std::allocator<char> >::~basic_ostringstream()@GLIBCXX_3.4'\n",
127
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_rebalance_for_erase(std::_Rb_tree_node_base*, std::_Rb_tree_node_base&)@GLIBCXX_3.4'\n",
128
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned long@CXXABI_1.3'\n",
129
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_List_node_base::_M_hook(std::__detail::_List_node_base*)@GLIBCXX_3.4.15'\n",
130
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_List_node_base::_M_unhook()@GLIBCXX_3.4.15'\n",
131
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ostringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
132
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringbuf<char, std::char_traits<char>, std::allocator<char> >::_M_sync(char*, unsigned long, unsigned long)@GLIBCXX_3.4'\n",
133
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_iostream<char, std::char_traits<char> >::~basic_iostream()@GLIBCXX_3.4'\n",
134
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::locale(std::locale const&)@GLIBCXX_3.4'\n",
135
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_istringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
136
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `log2f@GLIBC_2.2.5'\n",
137
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::operator<<(std::basic_streambuf<char, std::char_traits<char> >*)@GLIBCXX_3.4'\n",
138
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_streambuf<wchar_t, std::char_traits<wchar_t> >@GLIBCXX_3.4'\n",
139
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::exception::~exception()@GLIBCXX_3.4'\n",
140
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_Rep::_S_create(unsigned long, unsigned long, std::allocator<char> const&)@GLIBCXX_3.4'\n",
141
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__basic_file<char>::is_open() const@GLIBCXX_3.4'\n",
142
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_istringstream<char, std::char_traits<char>, std::allocator<char> >::~basic_istringstream()@GLIBCXX_3.4'\n",
143
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::swap(std::string&)@GLIBCXX_3.4'\n",
144
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ostringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
145
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf<char, std::char_traits<char> >::basic_streambuf(std::basic_streambuf<char, std::char_traits<char> > const&)@GLIBCXX_3.4'\n",
146
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<char, std::char_traits<char> >::init(std::basic_streambuf<char, std::char_traits<char> >*)@GLIBCXX_3.4'\n",
147
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_bad_cast()@GLIBCXX_3.4'\n",
148
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<char, std::char_traits<char> >::clear(std::_Ios_Iostate)@GLIBCXX_3.4'\n",
149
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf<wchar_t, std::char_traits<wchar_t> >::operator=(std::basic_streambuf<wchar_t, std::char_traits<wchar_t> > const&)@GLIBCXX_3.4'\n",
150
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for long*@CXXABI_1.3'\n",
151
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator delete(void*)@GLIBCXX_3.4'\n",
152
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::operator<<(int)@GLIBCXX_3.4'\n",
153
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_Rep::_S_empty_rep_storage@GLIBCXX_3.4'\n",
154
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_Rep::_M_destroy(std::allocator<char> const&)@GLIBCXX_3.4'\n",
155
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_iostream<wchar_t, std::char_traits<wchar_t> >::~basic_iostream()@GLIBCXX_3.4'\n",
156
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::runtime_error@GLIBCXX_3.4'\n",
157
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ofstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
158
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_insert_and_rebalance(bool, std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, std::_Rb_tree_node_base&)@GLIBCXX_3.4'\n",
159
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >::~basic_stringstream()@GLIBCXX_3.4'\n",
160
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_stringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
161
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<long>(long)@GLIBCXX_3.4.9'\n",
162
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::istream::get()@GLIBCXX_3.4'\n",
163
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned long long@CXXABI_1.3'\n",
164
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ostream<char, std::char_traits<char> >& std::operator<< <std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*)@GLIBCXX_3.4'\n",
165
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::out_of_range::~out_of_range()@GLIBCXX_3.4'\n",
166
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::length_error::~length_error()@GLIBCXX_3.4'\n",
167
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)@GLIBCXX_3.4.9'\n",
168
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::invalid_argument::~invalid_argument()@GLIBCXX_3.4'\n",
169
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >::swap(std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >&)@GLIBCXX_3.4'\n",
170
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::cout@GLIBCXX_3.4'\n",
171
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<unsigned long long>(unsigned long long)@GLIBCXX_3.4.9'\n",
172
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<void const*>(void const*)@GLIBCXX_3.4.9'\n",
173
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::underflow_error@GLIBCXX_3.4'\n",
174
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_streambuf<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
175
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for std::out_of_range@GLIBCXX_3.4'\n",
176
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_allocate_exception@CXXABI_1.3'\n",
177
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ios<wchar_t, std::char_traits<wchar_t> >@GLIBCXX_3.4'\n",
178
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for void const*@CXXABI_1.3'\n",
179
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<wchar_t, std::char_traits<wchar_t> >::init(std::basic_streambuf<wchar_t, std::char_traits<wchar_t> >*)@GLIBCXX_3.4'\n",
180
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::reserve(unsigned long)@GLIBCXX_3.4'\n",
181
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_begin_catch@CXXABI_1.3'\n",
182
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for long@CXXABI_1.3'\n",
183
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >::_Rep::_S_empty_rep_storage@GLIBCXX_3.4'\n",
184
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_leak()@GLIBCXX_3.4'\n",
185
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf<char, std::char_traits<char> >::open(char const*, std::_Ios_Openmode)@GLIBCXX_3.4'\n",
186
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringbuf<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >::_M_sync(wchar_t*, unsigned long, unsigned long)@GLIBCXX_3.4'\n",
187
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::istream::getline(char*, long, char)@GLIBCXX_3.4'\n",
188
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_istream<char, std::char_traits<char> >& std::getline<char, std::char_traits<char>, std::allocator<char> >(std::basic_istream<char, std::char_traits<char> >&, std::basic_string<char, std::char_traits<char>, std::allocator<char> >&, char)@GLIBCXX_3.4'\n",
189
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
190
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::condition_variable::~condition_variable()@GLIBCXX_3.4.11'\n",
191
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringbuf<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
192
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::insert(unsigned long, char const*, unsigned long)@GLIBCXX_3.4'\n",
193
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::assign(char const*, unsigned long)@GLIBCXX_3.4'\n",
194
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned char@CXXABI_1.3'\n",
195
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::ios_base()@GLIBCXX_3.4'\n",
196
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_out_of_range(char const*)@GLIBCXX_3.4'\n",
197
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::overflow_error::~overflow_error()@GLIBCXX_3.4'\n",
198
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_length_error(char const*)@GLIBCXX_3.4'\n",
199
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_system_error(int)@GLIBCXX_3.4.11'\n",
200
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ofstream<char, std::char_traits<char> >::close()@GLIBCXX_3.4'\n",
201
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<double>(double)@GLIBCXX_3.4.9'\n",
202
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf<char, std::char_traits<char> >::operator=(std::basic_streambuf<char, std::char_traits<char> > const&)@GLIBCXX_3.4'\n",
203
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for long long@CXXABI_1.3'\n",
204
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(char const*, unsigned long, std::allocator<char> const&)@GLIBCXX_3.4'\n",
205
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ifstream<char, std::char_traits<char> >::close()@GLIBCXX_3.4'\n",
206
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_guard_release@CXXABI_1.3'\n",
207
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_throw@CXXABI_1.3'\n",
208
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::underflow_error::~underflow_error()@GLIBCXX_3.4'\n",
209
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_decrement(std::_Rb_tree_node_base*)@GLIBCXX_3.4'\n",
210
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::length_error@GLIBCXX_3.4'\n",
211
+ "/mnt/jeff/anaconda/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf<char, std::char_traits<char> >::~basic_filebuf()@GLIBCXX_3.4'\n",
212
+ "collect2: error: ld returned 1 exit status\n",
213
+ "/mnt/jeff/huggingface/modules/transformers_modules/speech_conformer_encoder.py:2775: FutureWarning: Please specify CheckpointImpl.NO_REENTRANT as CheckpointImpl.REENTRANT will soon be removed as the default and eventually deprecated.\n",
214
+ " lambda i: encoder_checkpoint_wrapper(\n"
215
+ ]
216
+ },
217
+ {
218
+ "data": {
219
+ "application/vnd.jupyter.widget-view+json": {
220
+ "model_id": "b1f758d955e44dce9aac35eed8125134",
221
+ "version_major": 2,
222
+ "version_minor": 0
223
+ },
224
+ "text/plain": [
225
+ "Loading checkpoint shards: 0%| | 0/5 [00:00<?, ?it/s]"
226
+ ]
227
+ },
228
+ "metadata": {},
229
+ "output_type": "display_data"
230
+ }
231
+ ],
232
+ "source": [
233
+ "from io import BytesIO\n",
234
+ "from urllib.request import urlopen\n",
235
+ "import soundfile\n",
236
+ "import torch\n",
237
+ "from datasets import load_dataset, Audio\n",
238
+ "import numpy as np\n",
239
+ "from transformers import AutoModel, AutoProcessor, BatchFeature, AutoConfig\n",
240
+ "from tqdm import tqdm\n",
241
+ "import json\n",
242
+ "import os\n",
243
+ "import time\n",
244
+ "from datetime import datetime\n",
245
+ "import sacrebleu\n",
246
+ "from jiwer import cer, wer\n",
247
+ "from torch.utils.data import Dataset, DataLoader\n",
248
+ "import soundfile as sf\n",
249
+ "import re\n",
250
+ "from pathlib import Path\n",
251
+ "\n",
252
+ "\n",
253
+ "model_id = \"./\"\n",
254
+ "\n",
255
+ "model = AutoModel.from_pretrained(\n",
256
+ " model_id, device_map=\"cpu\", trust_remote_code=True,\n",
257
+ ").eval()"
258
+ ]
259
+ },
260
+ {
261
+ "cell_type": "code",
262
+ "execution_count": 2,
263
+ "metadata": {},
264
+ "outputs": [
265
+ {
266
+ "data": {
267
+ "application/vnd.jupyter.widget-view+json": {
268
+ "model_id": "2d03eb752659479babc4b0095c2cce97",
269
+ "version_major": 2,
270
+ "version_minor": 0
271
+ },
272
+ "text/plain": [
273
+ "Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
274
+ ]
275
+ },
276
+ "metadata": {},
277
+ "output_type": "display_data"
278
+ }
279
+ ],
280
+ "source": [
281
+ "from transformers.models.nemotron.modeling_nemotron import NemotronForCausalLM\n",
282
+ "from transformers import AutoModelForCausalLM\n",
283
+ "model_id = '/mnt/jeff/InCar/LlamaNemotronOmni/Llama-3.1-Nemotron-Nano-4B-v1.1'\n",
284
+ "revision = \"main\" #\"v1.0\"\n",
285
+ "model_org = AutoModelForCausalLM.from_pretrained(\n",
286
+ " model_id, device_map=\"cpu\", revision = revision, trust_remote_code=True,\n",
287
+ ").eval()"
288
+ ]
289
+ },
290
+ {
291
+ "cell_type": "code",
292
+ "execution_count": 3,
293
+ "metadata": {},
294
+ "outputs": [
295
+ {
296
+ "data": {
297
+ "text/plain": [
298
+ "(291, 291)"
299
+ ]
300
+ },
301
+ "execution_count": 3,
302
+ "metadata": {},
303
+ "output_type": "execute_result"
304
+ }
305
+ ],
306
+ "source": [
307
+ "keys = []\n",
308
+ "for k in model.state_dict():\n",
309
+ " if 'language_model' in k and not 'lora' in k:\n",
310
+ " keys.append(k)\n",
311
+ "len(model_org.state_dict()),len(keys)"
312
+ ]
313
+ },
314
+ {
315
+ "cell_type": "code",
316
+ "execution_count": 4,
317
+ "metadata": {},
318
+ "outputs": [],
319
+ "source": [
320
+ "new_state_dict = model.state_dict()\n",
321
+ "for k in keys:\n",
322
+ " new_k = k.replace('language_model.model.base_model.','').replace('language_model.','').replace('.base_layer','')\n",
323
+ " if not new_k in model_org.state_dict():\n",
324
+ " print(k)\n",
325
+ " else:\n",
326
+ " new_state_dict[k] = model_org.state_dict()[new_k]"
327
+ ]
328
+ },
329
+ {
330
+ "cell_type": "code",
331
+ "execution_count": 6,
332
+ "metadata": {},
333
+ "outputs": [
334
+ {
335
+ "data": {
336
+ "text/plain": [
337
+ "<All keys matched successfully>"
338
+ ]
339
+ },
340
+ "execution_count": 6,
341
+ "metadata": {},
342
+ "output_type": "execute_result"
343
+ }
344
+ ],
345
+ "source": [
346
+ "model.load_state_dict(new_state_dict)"
347
+ ]
348
+ },
349
+ {
350
+ "cell_type": "code",
351
+ "execution_count": 8,
352
+ "metadata": {},
353
+ "outputs": [
354
+ {
355
+ "name": "stderr",
356
+ "output_type": "stream",
357
+ "text": [
358
+ "/mnt/jeff/huggingface/modules/transformers_modules/gemma-3-4b-it-omni/speech_conformer_encoder.py:2775: FutureWarning: Please specify CheckpointImpl.NO_REENTRANT as CheckpointImpl.REENTRANT will soon be removed as the default and eventually deprecated.\n",
359
+ " lambda i: encoder_checkpoint_wrapper(\n"
360
+ ]
361
+ },
362
+ {
363
+ "name": "stdout",
364
+ "output_type": "stream",
365
+ "text": [
366
+ "######################## speech lora #############\n",
367
+ "######################## text lora #############\n"
368
+ ]
369
+ },
370
+ {
371
+ "data": {
372
+ "application/vnd.jupyter.widget-view+json": {
373
+ "model_id": "3e79659733a542c685c653b825c77cd6",
374
+ "version_major": 2,
375
+ "version_minor": 0
376
+ },
377
+ "text/plain": [
378
+ "Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
379
+ ]
380
+ },
381
+ "metadata": {},
382
+ "output_type": "display_data"
383
+ },
384
+ {
385
+ "name": "stderr",
386
+ "output_type": "stream",
387
+ "text": [
388
+ "Some weights of Gemma3OmniForConditionalGeneration were not initialized from the model checkpoint at /mnt/jeff/InCar/Gemma3omni/gemma-3-4b-it-omni and are newly initialized: ['language_model.model.base_model.model.layers.0.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.0.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.0.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.0.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.0.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.0.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.0.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.0.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.0.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.0.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.0.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.0.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.0.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.0.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.1.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.1.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.1.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.1.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.1.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.1.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.1.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.1.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.1.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.1.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.1.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.1.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.1.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.1.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.10.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.10.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.10.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.10.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.10.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.10.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.10.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.10.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.10.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.10.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.10.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.10.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.10.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.10.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.11.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.11.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.11.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.11.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.11.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.11.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.11.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.11.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.11.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.11.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.11.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.11.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.11.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.11.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.12.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.12.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.12.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.12.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.12.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.12.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.12.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.12.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.12.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.12.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.12.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.12.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.12.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.12.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.13.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.13.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.13.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.13.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.13.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.13.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.13.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.13.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.13.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.13.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.13.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.13.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.13.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.13.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.14.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.14.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.14.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.14.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.14.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.14.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.14.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.14.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.14.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.14.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.14.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.14.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.14.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.14.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.15.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.15.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.15.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.15.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.15.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.15.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.15.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.15.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.15.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.15.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.15.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.15.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.15.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.15.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.16.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.16.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.16.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.16.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.16.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.16.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.16.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.16.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.16.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.16.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.16.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.16.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.16.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.16.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.17.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.17.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.17.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.17.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.17.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.17.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.17.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.17.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.17.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.17.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.17.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.17.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.17.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.17.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.18.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.18.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.18.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.18.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.18.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.18.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.18.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.18.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.18.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.18.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.18.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.18.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.18.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.18.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.19.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.19.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.19.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.19.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.19.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.19.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.19.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.19.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.19.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.19.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.19.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.19.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.19.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.19.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.2.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.2.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.2.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.2.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.2.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.2.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.2.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.2.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.2.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.2.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.2.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.2.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.2.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.2.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.20.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.20.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.20.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.20.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.20.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.20.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.20.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.20.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.20.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.20.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.20.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.20.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.20.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.20.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.21.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.21.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.21.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.21.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.21.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.21.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.21.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.21.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.21.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.21.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.21.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.21.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.21.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.21.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.22.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.22.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.22.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.22.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.22.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.22.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.22.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.22.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.22.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.22.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.22.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.22.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.22.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.22.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.23.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.23.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.23.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.23.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.23.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.23.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.23.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.23.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.23.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.23.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.23.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.23.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.23.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.23.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.24.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.24.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.24.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.24.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.24.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.24.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.24.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.24.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.24.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.24.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.24.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.24.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.24.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.24.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.25.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.25.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.25.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.25.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.25.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.25.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.25.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.25.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.25.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.25.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.25.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.25.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.25.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.25.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.26.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.26.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.26.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.26.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.26.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.26.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.26.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.26.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.26.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.26.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.26.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.26.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.26.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.26.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.27.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.27.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.27.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.27.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.27.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.27.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.27.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.27.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.27.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.27.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.27.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.27.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.27.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.27.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.28.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.28.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.28.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.28.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.28.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.28.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.28.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.28.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.28.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.28.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.28.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.28.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.28.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.28.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.29.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.29.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.29.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.29.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.29.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.29.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.29.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.29.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.29.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.29.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.29.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.29.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.29.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.29.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.3.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.3.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.3.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.3.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.3.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.3.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.3.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.3.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.3.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.3.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.3.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.3.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.3.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.3.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.30.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.30.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.30.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.30.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.30.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.30.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.30.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.30.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.30.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.30.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.30.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.30.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.30.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.30.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.31.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.31.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.31.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.31.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.31.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.31.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.31.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.31.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.31.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.31.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.31.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.31.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.31.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.31.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.32.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.32.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.32.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.32.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.32.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.32.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.32.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.32.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.32.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.32.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.32.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.32.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.32.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.32.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.33.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.33.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.33.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.33.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.33.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.33.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.33.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.33.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.33.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.33.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.33.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.33.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.33.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.33.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.4.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.4.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.4.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.4.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.4.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.4.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.4.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.4.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.4.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.4.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.4.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.4.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.4.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.4.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.5.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.5.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.5.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.5.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.5.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.5.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.5.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.5.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.5.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.5.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.5.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.5.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.5.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.5.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.6.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.6.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.6.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.6.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.6.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.6.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.6.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.6.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.6.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.6.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.6.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.6.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.6.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.6.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.7.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.7.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.7.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.7.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.7.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.7.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.7.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.7.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.7.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.7.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.7.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.7.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.7.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.7.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.8.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.8.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.8.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.8.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.8.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.8.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.8.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.8.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.8.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.8.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.8.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.8.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.8.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.8.self_attn.v_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.9.mlp.down_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.9.mlp.down_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.9.mlp.gate_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.9.mlp.gate_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.9.mlp.up_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.9.mlp.up_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.9.self_attn.k_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.9.self_attn.k_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.9.self_attn.o_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.9.self_attn.o_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.9.self_attn.q_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.9.self_attn.q_proj.lora_B.text.weight', 'language_model.model.base_model.model.layers.9.self_attn.v_proj.lora_A.text.weight', 'language_model.model.base_model.model.layers.9.self_attn.v_proj.lora_B.text.weight']\n",
389
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
390
+ ]
391
+ }
392
+ ],
393
+ "source": [
394
+ "model_id = \"/mnt/jeff/InCar/Gemma3omni/gemma-3-4b-it-omni\"\n",
395
+ "revision = \"main\" #\"v1.0\"\n",
396
+ "\n",
397
+ "model2 = AutoModel.from_pretrained(\n",
398
+ " model_id, device_map=\"cpu\", revision = revision, trust_remote_code=True\n",
399
+ ").eval()\n"
400
+ ]
401
+ },
402
+ {
403
+ "cell_type": "code",
404
+ "execution_count": 9,
405
+ "metadata": {},
406
+ "outputs": [],
407
+ "source": [
408
+ "model.audio_tower = model2.audio_tower"
409
+ ]
410
+ },
411
+ {
412
+ "cell_type": "code",
413
+ "execution_count": 10,
414
+ "metadata": {},
415
+ "outputs": [],
416
+ "source": [
417
+ "tmp = model.audio_projector.state_dict()"
418
+ ]
419
+ },
420
+ {
421
+ "cell_type": "code",
422
+ "execution_count": 11,
423
+ "metadata": {},
424
+ "outputs": [
425
+ {
426
+ "name": "stdout",
427
+ "output_type": "stream",
428
+ "text": [
429
+ "0.weight tensor(0)\n",
430
+ "0.bias tensor(0)\n",
431
+ "2.weight tensor(0)\n",
432
+ "2.bias tensor(0)\n"
433
+ ]
434
+ }
435
+ ],
436
+ "source": [
437
+ "for k in tmp:\n",
438
+ " print(k,torch.sum(torch.isnan(tmp[k])))"
439
+ ]
440
+ },
441
+ {
442
+ "cell_type": "code",
443
+ "execution_count": 4,
444
+ "metadata": {},
445
+ "outputs": [],
446
+ "source": [
447
+ "tmp['0.bias'] = torch.zeros(tmp['0.bias'].shape)"
448
+ ]
449
+ },
450
+ {
451
+ "cell_type": "code",
452
+ "execution_count": 6,
453
+ "metadata": {},
454
+ "outputs": [
455
+ {
456
+ "data": {
457
+ "text/plain": [
458
+ "<All keys matched successfully>"
459
+ ]
460
+ },
461
+ "execution_count": 6,
462
+ "metadata": {},
463
+ "output_type": "execute_result"
464
+ }
465
+ ],
466
+ "source": [
467
+ "model.audio_projector.load_state_dict(tmp)"
468
+ ]
469
+ },
470
+ {
471
+ "cell_type": "code",
472
+ "execution_count": 2,
473
+ "metadata": {},
474
+ "outputs": [
475
+ {
476
+ "name": "stdout",
477
+ "output_type": "stream",
478
+ "text": [
479
+ "######################## speech lora #############\n"
480
+ ]
481
+ }
482
+ ],
483
+ "source": [
484
+ "model.init_lora()"
485
+ ]
486
+ },
487
+ {
488
+ "cell_type": "code",
489
+ "execution_count": 3,
490
+ "metadata": {},
491
+ "outputs": [],
492
+ "source": [
493
+ "model.save_pretrained('../init_nemotron_omni')"
494
+ ]
495
+ },
496
+ {
497
+ "cell_type": "code",
498
+ "execution_count": null,
499
+ "metadata": {},
500
+ "outputs": [],
501
+ "source": []
502
+ }
503
+ ],
504
+ "metadata": {
505
+ "kernelspec": {
506
+ "display_name": "base",
507
+ "language": "python",
508
+ "name": "python3"
509
+ },
510
+ "language_info": {
511
+ "codemirror_mode": {
512
+ "name": "ipython",
513
+ "version": 3
514
+ },
515
+ "file_extension": ".py",
516
+ "mimetype": "text/x-python",
517
+ "name": "python",
518
+ "nbconvert_exporter": "python",
519
+ "pygments_lexer": "ipython3",
520
+ "version": "3.12.7"
521
+ }
522
+ },
523
+ "nbformat": 4,
524
+ "nbformat_minor": 2
525
+ }
config.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "NemotronOmniForConditionalGeneration"
4
+ ],
5
+ "initializer_range": 0.02,
6
+
7
+ "text_config": {
8
+ "attention_bias": false,
9
+ "model_type": "llama_text",
10
+ "attention_dropout": 0.0,
11
+ "bos_token_id": 128000,
12
+ "eos_token_id": 128001,
13
+ "head_dim": 128,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 3072,
16
+ "intermediate_size": 9216,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "num_attention_heads": 32,
20
+ "num_hidden_layers": 32,
21
+ "num_key_value_heads": 8,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-05,
24
+ "rope_scaling": {
25
+ "factor": 4.0,
26
+ "low_freq_factor": 1.0,
27
+ "high_freq_factor": 4.0,
28
+ "original_max_position_embeddings": 8192,
29
+ "rope_type": "llama3"
30
+ },
31
+ "rope_theta": 3565775107.2609234,
32
+ "tie_word_embeddings": false,
33
+ "torch_dtype": "bfloat16",
34
+ "transformers_version": "4.47.1",
35
+ "use_cache": true,
36
+ "vocab_size": 128256
37
+ },
38
+
39
+ "audio_config": {
40
+ "activation": "swish",
41
+ "activation_checkpointing": {
42
+ "interval": 1,
43
+ "module": "transformer",
44
+ "offload": false
45
+ },
46
+ "attention_dim": 1024,
47
+ "attention_heads": 16,
48
+ "batch_norm": false,
49
+ "bias_in_glu": true,
50
+ "causal": true,
51
+ "chunk_size": -1,
52
+ "cnn_layer_norm": true,
53
+ "conv_activation": "swish",
54
+ "conv_glu_type": "swish",
55
+ "depthwise_multiplier": 1,
56
+ "depthwise_seperable_out_channel": 1024,
57
+ "dropout_rate": 0.0,
58
+ "encoder_embedding_config": {
59
+ "input_size": 80
60
+ },
61
+ "ext_pw_kernel_size": 1,
62
+ "ext_pw_out_channel": 1024,
63
+ "input_layer": "nemo_conv",
64
+ "input_size": 80,
65
+ "kernel_size": 3,
66
+ "left_chunk": 18,
67
+ "linear_units": 1536,
68
+ "nemo_conv_settings": {
69
+ "conv_channels": 1024
70
+ },
71
+ "num_blocks": 24,
72
+ "relative_attention_bias_args": {
73
+ "t5_bias_max_distance": 500,
74
+ "type": "t5"
75
+ },
76
+ "time_reduction": 8
77
+ },
78
+ "speech_lora": {
79
+ "dp": 0.01,
80
+ "layer": "((layers.*self_attn\\.(q|k|v|o)_proj)|(layers.*mlp\\.(gate|up|down)_proj))",
81
+ "lora_alpha": 320,
82
+ "r": 320,
83
+ "use_rslora": true
84
+ },
85
+ "auto_map": {
86
+ "AutoConfig": "configuration_nemotronOmni.NemotronOmniConfig",
87
+ "AutoModel": "modeling_NemotronOmni.NemotronOmniForConditionalGeneration"
88
+ },
89
+ "audio_token_index": 128255,
90
+ "boa_token_index": 128253,
91
+ "eoa_token_index": 128254,
92
+ "eos_token_id": [
93
+ 128001,
94
+ 128008,
95
+ 128009
96
+ ],
97
+ "model_type": "NemotronOmni"
98
+ }
configuration_nemotronOmni.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+
3
+ from transformers import AutoConfig, LlamaConfig
4
+ from transformers.configuration_utils import PretrainedConfig
5
+ from transformers.modeling_rope_utils import rope_config_validation
6
+ from transformers.utils import logging
7
+ from transformers.models.siglip import SiglipVisionConfig
8
+
9
+
10
+ logger = logging.get_logger(__name__)
11
+
12
+ class AudioConfig(PretrainedConfig):
13
+ model_type = "nemotron_audio"
14
+
15
+ def __init__(
16
+ self,
17
+ input_size=80,
18
+ attention_dim=1024,
19
+ attention_heads=16,
20
+ num_blocks=24,
21
+ linear_units=1536,
22
+ dropout_rate=0.0,
23
+ kernel_size=3,
24
+ ext_pw_kernel_size=1,
25
+ ext_pw_out_channel=1024,
26
+ depthwise_seperable_out_channel=1024,
27
+ depthwise_multiplier=1,
28
+ activation="swish",
29
+ conv_activation="swish",
30
+ conv_glu_type="swish",
31
+ bias_in_glu=True,
32
+ causal=True,
33
+ batch_norm=False,
34
+ cnn_layer_norm=True,
35
+ time_reduction=8,
36
+ input_layer="nemo_conv",
37
+ nemo_conv_settings=None,
38
+ chunk_size=-1,
39
+ left_chunk=18,
40
+ relative_attention_bias_args=None,
41
+ activation_checkpointing=None,
42
+ encoder_embedding_config=None,
43
+ **kwargs
44
+ ):
45
+ super().__init__(**kwargs)
46
+
47
+ self.input_size = input_size
48
+ self.attention_dim = attention_dim
49
+ self.attention_heads = attention_heads
50
+ self.num_blocks = num_blocks
51
+ self.linear_units = linear_units
52
+ self.dropout_rate = dropout_rate
53
+ self.kernel_size = kernel_size
54
+ self.ext_pw_kernel_size = ext_pw_kernel_size
55
+ self.ext_pw_out_channel = ext_pw_out_channel
56
+ self.depthwise_seperable_out_channel = depthwise_seperable_out_channel
57
+ self.depthwise_multiplier = depthwise_multiplier
58
+ self.activation = activation
59
+ self.conv_activation = conv_activation
60
+ self.conv_glu_type = conv_glu_type
61
+ self.bias_in_glu = bias_in_glu
62
+ self.causal = causal
63
+ self.batch_norm = batch_norm
64
+ self.cnn_layer_norm = cnn_layer_norm
65
+ self.time_reduction = time_reduction
66
+ self.input_layer = input_layer
67
+
68
+ if nemo_conv_settings is None:
69
+ self.nemo_conv_settings = {"conv_channels": 1024}
70
+ else:
71
+ self.nemo_conv_settings = nemo_conv_settings
72
+
73
+ self.chunk_size = chunk_size
74
+ self.left_chunk = left_chunk
75
+
76
+ if relative_attention_bias_args is None:
77
+ self.relative_attention_bias_args = {"type": "t5", "t5_bias_max_distance": 500}
78
+ else:
79
+ self.relative_attention_bias_args = relative_attention_bias_args
80
+
81
+ if activation_checkpointing is None:
82
+ self.activation_checkpointing = {"interval": 1, "module": "transformer", "offload": False}
83
+ else:
84
+ self.activation_checkpointing = activation_checkpointing
85
+
86
+ if encoder_embedding_config is None:
87
+ self.encoder_embedding_config = {"input_size": input_size}
88
+ else:
89
+ self.encoder_embedding_config = encoder_embedding_config
90
+
91
+
92
+ class NemotronOmniConfig(PretrainedConfig):
93
+
94
+ model_type = "NemotronOmni"
95
+ sub_configs = {
96
+ "text_config": LlamaConfig,
97
+ "audio_config": AudioConfig,
98
+ }
99
+
100
+ def __init__(
101
+ self,
102
+ text_config: Optional[LlamaConfig] = None,
103
+ vision_config: Optional[SiglipVisionConfig] = None,
104
+ audio_config: Optional[AudioConfig] = None,
105
+ # mm_tokens_per_image: int = 256,
106
+ # boi_token_index: int = 255_999,
107
+ # eoi_token_index: int = 256_000,
108
+ boa_token_index: int = 128253,
109
+ eoa_token_index: int = 128254,
110
+ # image_token_index: int = 262_144,
111
+ audio_token_index: int = 128255,
112
+ initializer_range: float = 0.02,
113
+ **kwargs,
114
+ ):
115
+ if text_config is None:
116
+ text_config = LlamaConfig()
117
+ logger.info("text_config is None, using default LlamaConfig vision config.")
118
+ elif isinstance(text_config, dict):
119
+ text_config = LlamaConfig(**text_config)
120
+
121
+ # if isinstance(vision_config, dict):
122
+ # vision_config = SiglipVisionConfig(**vision_config)
123
+ # else:
124
+ # vision_config = SiglipVisionConfig()
125
+ # logger.info(
126
+ # "vision_config is None or incompatible with Gemma3VisionConfig intialization. Gemma3 will be limited "
127
+ # "to text tasks."
128
+ # )
129
+
130
+ if isinstance(audio_config, dict):
131
+ audio_config = AudioConfig(**audio_config)
132
+ else:
133
+ audio_config = AudioConfig()
134
+ logger.info(
135
+ "audio_config is None or incompatible with Gemma3AudioConfig intialization. Gemma3 will be limited "
136
+ "to text tasks."
137
+ )
138
+
139
+ self.text_config = text_config
140
+ # self.vision_config = vision_config
141
+ self.audio_config = audio_config
142
+ # self.mm_tokens_per_image = mm_tokens_per_image
143
+ # self.boi_token_index = boi_token_index
144
+ # self.eoi_token_index = eoi_token_index
145
+ self.boa_token_index = boa_token_index
146
+ self.eoa_token_index = eoa_token_index
147
+ # self.image_token_index = image_token_index
148
+ self.audio_token_index = audio_token_index
149
+ self.initializer_range = initializer_range
150
+
151
+ super().__init__(**kwargs)
eval.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+ from urllib.request import urlopen
3
+ import soundfile
4
+ import torch
5
+ from datasets import load_dataset, Audio
6
+ import numpy as np
7
+ from transformers import AutoModel, AutoProcessor, BatchFeature
8
+ from tqdm import tqdm
9
+ import json
10
+ import os
11
+ import time
12
+ from datetime import datetime
13
+ from whisper_normalizer.english import EnglishTextNormalizer
14
+ from whisper_normalizer.basic import BasicTextNormalizer
15
+ import sacrebleu
16
+ from jiwer import cer, wer
17
+ from torch.utils.data import Dataset, DataLoader
18
+ import soundfile as sf
19
+ import re
20
+ from pathlib import Path
21
+ import opencc
22
+ from ASRDataset import *
23
+ converter = opencc.OpenCC('s2tw.json')
24
+ normalizer = {
25
+ "en_us" : EnglishTextNormalizer(),
26
+ "other" : BasicTextNormalizer()
27
+ }
28
+
29
+ model_id = "/mnt/jeff/InCar/LlamaNemotronOmni/test_nemotron_omni"
30
+ revision = "main" #"v1.0"
31
+
32
+ model = AutoModel.from_pretrained(
33
+ model_id, device_map="cuda", revision = revision, trust_remote_code=True
34
+ ).eval()
35
+
36
+ processor = AutoProcessor.from_pretrained(
37
+ model_id, revision = revision, trust_remote_code=True
38
+ )
39
+ if 'LlamaNemotronOmni' in model_id:
40
+ processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id
41
+
42
+ results_dir = f"evaluation_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
43
+ os.makedirs(results_dir, exist_ok=True)
44
+
45
+
46
+ INSTRUCTION = {
47
+ "ast": "Translate the audio to {0}.",
48
+ "asr": "Transcribe the audio clip into text.",
49
+ }
50
+
51
+ def covost_collate_fn_test(batch):
52
+ input_ids_list = []
53
+ input_audio_embeds_list = []
54
+ audio_embed_sizes_list = []
55
+ audio_attention_mask_list = []
56
+ input_modes_list = []
57
+ answer_list = []
58
+ for inputs in batch:
59
+ input_ids_list.append(inputs['input_ids'][0])
60
+ input_audio_embeds_list.append(inputs['input_audio_embeds'])
61
+ audio_embed_sizes_list.append(inputs['audio_embed_sizes'])
62
+ audio_attention_mask_list.append(
63
+ inputs['input_audio_embeds'].new_full((inputs['input_audio_embeds'].size(1),), True, dtype=torch.bool)
64
+ )
65
+ input_modes_list.append(inputs['input_modes'])
66
+ answer_list.append(inputs['answer'])
67
+
68
+ try:
69
+ input_ids = pad_sequence(input_ids_list, padding_side='left', padding_value=0)
70
+ audio_attention_mask = (
71
+ pad_sequence(audio_attention_mask_list, padding_side='right', padding_value=False)
72
+ if len(audio_attention_mask_list) > 1
73
+ else None
74
+ )
75
+ except Exception as e:
76
+ print(e)
77
+ print(input_ids_list)
78
+ print(audio_attention_mask)
79
+ raise
80
+ attention_mask = (input_ids != 0).long()
81
+ input_audio_embeds = cat_with_pad(input_audio_embeds_list, dim=0)
82
+ audio_embed_sizes = torch.cat(audio_embed_sizes_list)
83
+ input_modes = torch.cat(input_modes_list)
84
+
85
+ return BatchFeature(
86
+ {
87
+ 'input_ids': input_ids,
88
+ 'attention_mask': attention_mask,
89
+ 'input_audio_embeds': input_audio_embeds,
90
+ 'audio_embed_sizes': audio_embed_sizes,
91
+ 'audio_attention_mask': audio_attention_mask,
92
+ 'input_modes': input_modes,
93
+ 'answer': answer_list,
94
+ }
95
+ )
96
+
97
+ def save_results(results, dataset_name, task, source_lang, target_lang=None, sample_idx=None):
98
+ filename = f"{task}_{dataset_name}_{source_lang}"
99
+ if target_lang:
100
+ filename += f"_to_{target_lang}"
101
+ if sample_idx is not None:
102
+ filename += f"_sample_{sample_idx}"
103
+
104
+ filepath = os.path.join(results_dir, f"{filename}.json")
105
+
106
+ results["timestamp"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
107
+
108
+ with open(filepath, 'w', encoding='utf-8') as f:
109
+ json.dump(results, f, ensure_ascii=False, indent=2)
110
+
111
+ return filepath
112
+
113
+ def evaluate_task(dataset, source_lang, target_lang, num_samples=-1, batch_size = 4, is_asr=True):
114
+ task_type = "asr" if is_asr else "translation"
115
+ eval_lang = source_lang if is_asr else target_lang
116
+ if eval_lang in normalizer:
117
+ eval_normalizer = normalizer[eval_lang]
118
+ else:
119
+ eval_normalizer = normalizer['other']
120
+ sample_results = []
121
+
122
+ if num_samples > 0 and num_samples < len(dataset):
123
+ indices = np.random.choice(len(dataset), num_samples, replace=False)
124
+ dataset = dataset.select(indices)
125
+
126
+ dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=covost_collate_fn_test)
127
+
128
+ evaluated_samples = {}
129
+
130
+ for batch_idx, batch in enumerate(tqdm(dataloader)):
131
+ batch_references = batch.pop("answer")
132
+
133
+ if torch.cuda.is_available():
134
+ try:
135
+ batch = {k: v.to("cuda") for k, v in batch.items()}
136
+ except:
137
+ print('error')
138
+ break
139
+
140
+ with torch.inference_mode():
141
+ generate_ids = model.generate(**batch,
142
+ max_new_tokens=256,
143
+ #temperature = 1.0, top_p = 0.95, top_k = 64, do_sample=True
144
+ )
145
+
146
+ input_lengths = batch['input_ids'].shape[1]
147
+ generate_ids = generate_ids[:, input_lengths:]
148
+
149
+ batch_predictions = processor.batch_decode(
150
+ generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
151
+ )
152
+
153
+ for i, (reference, prediction) in enumerate(zip(batch_references, batch_predictions)):
154
+ idx = batch_idx * batch_size + i
155
+ sample_result = {
156
+ "id": idx,
157
+ "reference": reference,
158
+ "prediction": converter.convert(prediction)
159
+ }
160
+ sample_results.append(sample_result)
161
+
162
+ if (batch_idx + 1) % 10 == 0:
163
+ temp_results = []
164
+
165
+ for item in sample_results:
166
+ sample_id = item["id"]
167
+
168
+ if sample_id in evaluated_samples:
169
+ temp_item = item.copy()
170
+ temp_item.update(evaluated_samples[sample_id])
171
+ temp_results.append(temp_item)
172
+ else:
173
+ temp_item = item.copy()
174
+ try:
175
+ ref = eval_normalizer(item["reference"])
176
+ pred = eval_normalizer(item["prediction"])
177
+
178
+ # BLEU, WER/CER
179
+ utt_bleu = sacrebleu.sentence_bleu(pred, [ref]).score
180
+ utt_cer = round(cer(re.sub(r"\s+", "", ref), re.sub(r"\s+", "", pred)) * 100, 2)
181
+ utt_wer = round(wer(ref, pred) * 100, 2)
182
+
183
+ metrics = {
184
+ "bleu": utt_bleu,
185
+ "cer": min(100,utt_cer),
186
+ "wer": utt_wer
187
+ }
188
+
189
+ evaluated_samples[sample_id] = metrics
190
+ temp_item.update(metrics)
191
+ except Exception as e:
192
+ print(f"Error evaluating sample {sample_id}: {e}")
193
+ metrics = {
194
+ "bleu": 0,
195
+ "cer": 100,
196
+ "wer": 100,
197
+ "error": str(e)
198
+ }
199
+ evaluated_samples[sample_id] = metrics
200
+ temp_item.update(metrics)
201
+
202
+ temp_results.append(temp_item)
203
+
204
+ partial_results = {
205
+ "task": task_type,
206
+ "source_lang": source_lang,
207
+ "target_lang": target_lang,
208
+ "num_samples": len(temp_results),
209
+ "sample_results": temp_results
210
+ }
211
+ save_results(partial_results, dataset.name, task_type, source_lang, target_lang)
212
+
213
+ for item in sample_results:
214
+ ref = eval_normalizer(item["reference"])
215
+ pred = eval_normalizer(item["prediction"])
216
+
217
+ utt_bleu = sacrebleu.sentence_bleu(pred, [ref]).score
218
+ utt_cer = round(cer(re.sub(r"\s+", "", ref), re.sub(r"\s+", "", pred)) * 100, 2)
219
+ utt_wer = round(wer(ref, pred) * 100, 2)
220
+
221
+ item.update({
222
+ "bleu": utt_bleu,
223
+ "cer": min(100,utt_cer),
224
+ "wer": utt_wer
225
+ })
226
+
227
+ avg_bleu = sum(item["bleu"] for item in sample_results) / len(sample_results)
228
+ avg_cer = sum(item["cer"] for item in sample_results) / len(sample_results)
229
+ avg_wer = sum(item["wer"] for item in sample_results) / len(sample_results)
230
+
231
+ results = {
232
+ "dataset": dataset.name,
233
+ "task": task_type,
234
+ "source_lang": source_lang,
235
+ "target_lang": target_lang,
236
+ "num_samples": len(sample_results),
237
+ "metrics": {
238
+ "bleu": avg_bleu,
239
+ "cer": avg_cer,
240
+ "wer": avg_wer
241
+ },
242
+ "sample_results": sample_results
243
+ }
244
+
245
+ save_results(results, dataset.name, task_type, source_lang, target_lang)
246
+ return results
247
+
248
+
249
+ if __name__ == "__main__":
250
+
251
+ source_languages = [
252
+ ("en_us", "English"),
253
+ ]
254
+
255
+ target_languages = [
256
+ ("zh-TW", "zh-TW"),
257
+ ]
258
+
259
+ num_samples = -1
260
+ batch_size = 32
261
+
262
+ for source_lang, target_lang in zip(source_languages, target_languages):
263
+ print(f"\n===== {source_lang[0]} ASR =====")
264
+
265
+ split = "test"
266
+
267
+ datasets = []
268
+
269
+
270
+
271
+ commonvoice_speech_tw = CommonVoiceDataset(
272
+ processor=processor,
273
+ source_lang="zh-TW",
274
+ split=split
275
+ )
276
+ datasets.append(commonvoice_speech_tw)
277
+ fleurs = FleursDataset(
278
+ processor=processor,
279
+ split=split,
280
+ source_lang="en_us", # English
281
+ mode="asr"
282
+ )
283
+ datasets.append(fleurs)
284
+
285
+ # Libri Speech Clean ASR mode (English -> English text)
286
+ # libri_speech_clean = LibriSpeechDataset(
287
+ # processor=processor,
288
+ # subset="clean",
289
+ # split=split
290
+ # )
291
+ # datasets.append(libri_speech_clean)
292
+
293
+ # # Libri Speech Other ASR mode (English -> English text)
294
+ # libri_speech_other = LibriSpeechDataset(
295
+ # processor=processor,
296
+ # subset="other",
297
+ # split=split
298
+ # )
299
+ # datasets.append(libri_speech_other)
300
+
301
+ # Fleurs ASR mode (English -> English text)
302
+
303
+
304
+ for dataset in datasets:
305
+ # ASR
306
+ asr_results = evaluate_task(dataset, source_lang[0], target_lang[0], num_samples, batch_size=batch_size, is_asr = True)
307
+
308
+ print(f"\n=== {asr_results.get('dataset', 'Dataset')} | {source_lang[0]} ASR===")
309
+ print(f"BLEU: {asr_results.get('metrics', {}).get('bleu', 'N/A')}")
310
+ print(f"WER: {asr_results.get('metrics', {}).get('wer', 'N/A')}")
311
+ print(f"CER: {asr_results.get('metrics', {}).get('cer', 'N/A')}")
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "transformers_version": "4.47.1"
10
+ }
image_processing.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional, Union
2
+
3
+ from PIL import Image
4
+ import torch
5
+ from transformers.image_processing_base import BatchFeature
6
+ from transformers.image_processing_utils_fast import (BaseImageProcessorFast,
7
+ divide_to_patches)
8
+ from transformers.image_utils import (ChannelDimension, SizeDict,
9
+ get_image_size, make_list_of_images,
10
+ get_image_type, ImageInput, ImageType)
11
+ from transformers.utils import TensorType
12
+
13
+
14
+ def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
15
+ best_factor = float('-inf')
16
+ best_ratio = (1, 1)
17
+ area = width * height
18
+ for ratio in target_ratios:
19
+ target_aspect_ratio = ratio[0] / ratio[1]
20
+ factor_based_on_area_n_ratio = min(
21
+ (ratio[0]*ratio[1]*image_size*image_size)/ area, 0.6
22
+ )* min(
23
+ target_aspect_ratio/aspect_ratio, aspect_ratio/target_aspect_ratio)
24
+ if factor_based_on_area_n_ratio > best_factor:
25
+ best_factor = factor_based_on_area_n_ratio
26
+ best_ratio = ratio
27
+ return best_ratio
28
+
29
+
30
+ class LlamaNemotronNanoVLImageProcessor(BaseImageProcessorFast):
31
+ model_input_names = ["pixel_values"]
32
+
33
+ def __init__(self, image_size=512, max_num_tiles=12, use_thumbnail=True, **kwargs):
34
+ super().__init__(**kwargs)
35
+ self.image_size = image_size
36
+ self.max_num_tiles = max_num_tiles
37
+ self.use_thumbnail = use_thumbnail
38
+
39
+ # Based on https://github.com/OpenGVLab/InternVL/blob/c62fa4f7c850165d7386bdc48ac6bc5a6fab0864/internvl_chat/internvl/train/dataset.py#L702
40
+ def dynamic_preprocess(self, image, image_size=448, max_num_tiles=12, use_thumbnail=False):
41
+ orig_height, orig_width = get_image_size(image, channel_dim=ChannelDimension.FIRST)
42
+ aspect_ratio = orig_width / orig_height
43
+
44
+ # calculate the existing image aspect ratio
45
+ target_ratios = set(
46
+ (i, j) for n in range(1, max_num_tiles + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
47
+ i * j <= max_num_tiles and i * j >= 1)
48
+ target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
49
+
50
+ # find the closest aspect ratio to the target
51
+ target_aspect_ratio = find_closest_aspect_ratio(
52
+ aspect_ratio, target_ratios, orig_width, orig_height, image_size)
53
+
54
+ # calculate the target width and height
55
+ target_width = image_size * target_aspect_ratio[0]
56
+ target_height = image_size * target_aspect_ratio[1]
57
+
58
+ resized_img = self.resize(image, SizeDict(height=target_height, width=target_width))
59
+ patches = divide_to_patches(resized_img, image_size)
60
+ if use_thumbnail and len(patches) != 1:
61
+ patches.append(self.resize(image, SizeDict(height=image_size, width=image_size)))
62
+
63
+ return patches
64
+
65
+ def _process_image(
66
+ self,
67
+ image: ImageInput,
68
+ **kwargs,
69
+ ) -> torch.Tensor:
70
+ image_type = get_image_type(image)
71
+ if image_type not in [ImageType.PIL]:
72
+ raise ValueError(f"Unsupported input image type {image_type}. Only PIL images supported")
73
+ image = image.resize((image.width * 2, image.height * 2), Image.BILINEAR)
74
+ return super()._process_image(image, **kwargs)
75
+
76
+ def _preprocess(
77
+ self,
78
+ images: List[torch.Tensor],
79
+ image_size: int = None,
80
+ max_num_tiles: int = None,
81
+ use_thumbnail: bool = None,
82
+ do_rescale: bool = None,
83
+ return_tensors: Optional[Union[str, TensorType]] = None,
84
+ **kwargs,
85
+ ) -> List[torch.Tensor]:
86
+ image_size = image_size if image_size is not None else self.image_size
87
+ max_num_tiles = max_num_tiles if max_num_tiles is not None else self.max_num_tiles
88
+ use_thumbnail = use_thumbnail if use_thumbnail is not None else self.use_thumbnail
89
+ do_rescale = do_rescale if do_rescale is not None else self.do_rescale
90
+
91
+ images = make_list_of_images(images)
92
+
93
+ all_patches = []
94
+ num_patches = []
95
+ for image in images:
96
+ patches = self.dynamic_preprocess(
97
+ image, image_size, max_num_tiles, use_thumbnail
98
+ )
99
+ all_patches.extend(patches)
100
+ num_patches.append(len(patches))
101
+
102
+ pixel_values = torch.stack(all_patches, dim=0)
103
+ pixel_values = self.rescale_and_normalize(
104
+ pixel_values,
105
+ do_rescale,
106
+ self.rescale_factor,
107
+ do_normalize=self.do_normalize,
108
+ image_mean=self.image_mean,
109
+ image_std=self.image_std
110
+ )
111
+
112
+ return BatchFeature(data={"pixel_values": pixel_values, "num_patches": num_patches}, tensor_type=return_tensors)
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a92b30f1db46c461b3cc8925efd5d4dc6aec0d09f8fee187401e883bf6f0ea9
3
+ size 4993077672
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71bdd0cd6cae461ba7480a092286f1ccd0ac633f8fd54df00bf3951945722886
3
+ size 4968834432
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c752aa51ccfb3fa23c533e8e90abe1ae910e0f1ff96143f00e7618414af1ab8
3
+ size 1187810344
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
modeling_NemotronOmni.py ADDED
@@ -0,0 +1,447 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ from collections.abc import Callable
3
+ from dataclasses import dataclass
4
+ from typing import List, Optional, Tuple, Union
5
+
6
+ import torch
7
+ import torch.nn as nn
8
+
9
+ from transformers.cache_utils import Cache, HybridCache, StaticCache
10
+ from transformers.generation import GenerationMixin
11
+ from transformers.utils.deprecation import deprecate_kwarg
12
+ from transformers import AutoModel, AutoModelForCausalLM
13
+
14
+ from transformers.models.nemotron.modeling_nemotron import NemotronPreTrainedModel
15
+ from transformers.modeling_utils import PreTrainedModel
16
+ from transformers.modeling_outputs import CausalLMOutputWithPast
17
+ from transformers import AutoConfig, AutoModelForCausalLM
18
+ from transformers.utils import (
19
+ add_start_docstrings,
20
+ add_start_docstrings_to_model_forward,
21
+ is_torchdynamo_compiling,
22
+ logging,
23
+ replace_return_docstrings,
24
+ )
25
+ from .configuration_nemotronOmni import NemotronOmniConfig
26
+ from .speech_conformer_encoder import ConformerEncoder
27
+ from enum import Enum
28
+ class InputMode(Enum):
29
+ LANGUAGE = 0
30
+ VISION = 1
31
+ SPEECH = 2
32
+ VISION_SPEECH = 3
33
+ logger = logging.get_logger(__name__)
34
+ _CONFIG_FOR_DOC = "NemotronOmniConfig"
35
+
36
+ class NemotronOmniForConditionalGeneration(PreTrainedModel, GenerationMixin):
37
+ config_class = NemotronOmniConfig
38
+ use_gradient_checkpointing=False
39
+ def __init__(self, config: NemotronOmniConfig):
40
+ super().__init__(config)
41
+ audio_config = config.audio_config.to_diff_dict()
42
+ for item in ['transformers_version', 'model_type', 'torch_dtype']:
43
+ if item in audio_config:
44
+ audio_config.pop(item)
45
+ self.audio_tower = ConformerEncoder(**audio_config)
46
+ self.audio_tower.post_init({})
47
+ self.audio_tower = self.audio_tower.to(dtype=self.dtype)
48
+ self.audio_projector = nn.Sequential(
49
+ nn.Linear(in_features=config.audio_config.attention_dim, out_features=config.text_config.hidden_size, bias=True),
50
+ nn.GELU(approximate='none'),
51
+ nn.Linear(in_features=config.text_config.hidden_size, out_features=config.text_config.hidden_size, bias=True)
52
+ ).to(dtype=self.dtype)
53
+
54
+ self.vocab_size = config.text_config.vocab_size
55
+
56
+ language_model = AutoModelForCausalLM.from_config(config=config.text_config)
57
+
58
+ if language_model._tied_weights_keys is not None:
59
+ self._tied_weights_keys = [f"language_model.{k}" for k in language_model._tied_weights_keys]
60
+ self.language_model = language_model
61
+
62
+ self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1
63
+ self.init_lora()
64
+ self.post_init()
65
+
66
+
67
+ def init_lora(self):
68
+ from peft import LoraConfig, get_peft_model
69
+ import warnings
70
+ print('######################## speech lora #############')
71
+ speech_lora_config = LoraConfig(
72
+ r=self.config.speech_lora['r'],
73
+ lora_alpha=self.config.speech_lora['lora_alpha'],
74
+ target_modules=self.config.speech_lora['layer'],
75
+ use_rslora=self.config.speech_lora['use_rslora'],
76
+ lora_dropout=self.config.speech_lora['dp'],
77
+ task_type="CAUSAL_LM",
78
+ )
79
+ self.language_model.model = get_peft_model(self.language_model.model, speech_lora_config, adapter_name="speech")
80
+ # print('######################## text lora #############')
81
+ # text_lora_config = LoraConfig(
82
+ # r=self.config.text_lora['r'],
83
+ # lora_alpha=self.config.text_lora['lora_alpha'],
84
+ # target_modules=self.config.text_lora['layer'],
85
+ # use_rslora=self.config.text_lora['use_rslora'],
86
+ # lora_dropout=self.config.text_lora['dp'],
87
+ # task_type="CAUSAL_LM",
88
+ # )
89
+ # self.language_model.model.base_model.active_adapter.append("text")
90
+ # self.language_model.model.add_adapter("text", text_lora_config)
91
+
92
+ def set_lora_adapter(self, adapter_name) -> None:
93
+ from peft.tuners.lora.layer import LoraLayer
94
+ for module in self.modules():
95
+ if isinstance(module, LoraLayer):
96
+ if module.merged:
97
+ warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
98
+ module.unmerge()
99
+ module.set_adapter(adapter_name)
100
+ module._disable_adapters = False
101
+
102
+ def unset_lora_adapter(self) -> None:
103
+ # Ref: peft/tuners/tuners_utils.py - enable_adapters()
104
+ # Ref: peft/tuners/lora/layer.py
105
+ from peft.tuners.lora.layer import LoraLayer
106
+ for module in self.modules():
107
+ if isinstance(module, LoraLayer):
108
+ # disable grads on all adapter layers
109
+ # TODO weijian: may use enable_adapters() instead
110
+ for layer_name in module.adapter_layer_names:
111
+ layer = getattr(module, layer_name)
112
+ layer.requires_grad_(False)
113
+ module._disable_adapters = True
114
+ def get_input_embeddings(self):
115
+ return self.language_model.model.embed_tokens
116
+
117
+ def set_input_embeddings(self, value):
118
+ self.language_model.model.embed_tokens = value
119
+
120
+ def get_output_embeddings(self):
121
+ return self.language_model.lm_head
122
+
123
+ def set_output_embeddings(self, new_embeddings):
124
+ self.language_model.lm_head = new_embeddings
125
+
126
+ def set_decoder(self, decoder):
127
+ self.language_model.model = decoder
128
+
129
+ def get_decoder(self):
130
+ return self.language_model.model
131
+
132
+ def _update_causal_mask(
133
+ self,
134
+ attention_mask,
135
+ token_type_ids,
136
+ past_key_values,
137
+ cache_position,
138
+ input_tensor,
139
+ is_training: bool = False,
140
+ ):
141
+ if self.config.text_config._attn_implementation == "flash_attention_2":
142
+ return attention_mask
143
+
144
+ if attention_mask is not None and attention_mask.dim() == 4:
145
+ # In this case we assume that the mask comes already in inverted
146
+ # form and requires no inversion or slicing.
147
+ return attention_mask
148
+
149
+ using_static_cache = isinstance(past_key_values, StaticCache)
150
+ min_dtype = torch.finfo(self.dtype).min
151
+ inputs_lead_dim, sequence_length = input_tensor.shape[:2]
152
+ if using_static_cache:
153
+ target_length = past_key_values.get_max_cache_shape()
154
+ elif isinstance(past_key_values, HybridCache):
155
+ target_length = past_key_values.get_max_cache_shape()
156
+ else:
157
+ target_length = (
158
+ attention_mask.shape[-1]
159
+ if isinstance(attention_mask, torch.Tensor)
160
+ else cache_position[0] + sequence_length + 1
161
+ )
162
+
163
+ if attention_mask is not None and attention_mask.dim() == 4:
164
+ # In this case we assume that the mask comes already in inverted form and requires no inversion or slicing.
165
+ return attention_mask
166
+
167
+ causal_mask = torch.full(
168
+ (sequence_length, target_length), fill_value=min_dtype, dtype=self.dtype, device=cache_position.device
169
+ )
170
+
171
+ # Causal diagonal mask only if training, otherwise attend to the whole prefix. Training-specific attn for prefix is handled below
172
+ if sequence_length != 1:
173
+ causal_mask = torch.triu(causal_mask, diagonal=1)
174
+
175
+ causal_mask *= torch.arange(target_length, device=cache_position.device) > cache_position.reshape(-1, 1)
176
+ causal_mask = causal_mask[None, None, :, :].expand(inputs_lead_dim, 1, -1, -1)
177
+
178
+ # Apply bidirectional mask on images if token type ids are provided
179
+ if token_type_ids is not None and sequence_length != 1:
180
+ token_type_mask = token_type_ids.unsqueeze(1) == token_type_ids.unsqueeze(2)
181
+ token_type_mask[token_type_ids == 0] = False # if text token do not change anything
182
+ token_type_mask = token_type_mask.unsqueeze(1).to(causal_mask.device, dtype=torch.bool)
183
+ causal_mask = causal_mask.clone()
184
+ causal_mask[:, :, :, :sequence_length] = causal_mask[:, :, :, :sequence_length].masked_fill(
185
+ token_type_mask, 0.0
186
+ )
187
+
188
+ if attention_mask is not None:
189
+ causal_mask = causal_mask.clone() # copy to contiguous memory for in-place edit
190
+ mask_length = attention_mask.shape[-1]
191
+
192
+ # Then apply padding mask (will mask pad tokens)
193
+ padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :].to(causal_mask.device)
194
+ padding_mask = padding_mask == 0
195
+ causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
196
+ padding_mask, min_dtype
197
+ )
198
+
199
+ return causal_mask
200
+
201
+ def get_audio_features(self, input_audio_embeds: torch.FloatTensor, audio_attention_mask: torch.FloatTensor, audio_embed_sizes: torch.FloatTensor):
202
+ """
203
+ Projects the last hidden state from the audio model into language model space.
204
+
205
+ Args:
206
+ audio_inputs (`torch.FloatTensor]` of shape `(batch_size, sequence_length, feature_dim)`)
207
+ The tensors corresponding to the input audio features.
208
+
209
+ Returns:
210
+ audio_features (`torch.Tensor`): Audio feature tensor of shape `(batch_size, audio_length, embed_dim)`).
211
+ """
212
+
213
+ audio_features, masks = self.audio_tower(input_audio_embeds, audio_attention_mask)
214
+ audio_outputs = self.audio_projector(audio_features)
215
+ return audio_outputs
216
+
217
+ def forward(
218
+ self,
219
+ input_ids: Optional[torch.LongTensor] = None,
220
+ pixel_values: Optional[torch.FloatTensor] = None,
221
+ input_audio_embeds: torch.FloatTensor = None,
222
+ audio_embed_sizes: torch.FloatTensor = None,
223
+ audio_attention_mask: torch.FloatTensor = None,
224
+ attention_mask: Optional[torch.Tensor] = None,
225
+ input_modes: torch.LongTensor = None,
226
+ position_ids: Optional[torch.LongTensor] = None,
227
+ past_key_values: Optional[Union[List[torch.FloatTensor], Cache]] = None,
228
+ token_type_ids: Optional[torch.LongTensor] = None,
229
+ cache_position: Optional[torch.LongTensor] = None,
230
+ inputs_embeds: Optional[torch.FloatTensor] = None,
231
+ labels: Optional[torch.LongTensor] = None,
232
+ use_cache: Optional[bool] = None,
233
+ output_attentions: Optional[bool] = None,
234
+ output_hidden_states: Optional[bool] = None,
235
+ return_dict: Optional[bool] = None,
236
+ logits_to_keep: Union[int, torch.Tensor] = 0,
237
+ **lm_kwargs,
238
+ ) -> Union[Tuple, CausalLMOutputWithPast]:
239
+
240
+ if (input_ids is None) ^ (inputs_embeds is not None):
241
+ raise ValueError("You must specify exactly one of input_ids or inputs_embeds")
242
+
243
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
244
+ output_hidden_states = (
245
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
246
+ )
247
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
248
+
249
+ if isinstance(input_modes, torch.Tensor):
250
+ # len(input_mode) == num_beams in beam search, and all elements of input_mode should have the same value
251
+ input_modes = input_modes.unique()
252
+ if len(input_modes) != 1:
253
+ raise ValueError("Elements of input_modes should have the same value")
254
+
255
+ input_mode = InputMode(input_modes.item())
256
+
257
+ if input_mode in [InputMode.VISION_SPEECH, InputMode.VISION]:
258
+ self.unset_lora_adapter()
259
+ #self.set_lora_adapter('vision')
260
+ #audio_projection_mode = 'vision'
261
+ elif input_mode == InputMode.SPEECH:
262
+ self.unset_lora_adapter()
263
+ self.set_lora_adapter('speech')
264
+ #audio_projection_mode = 'speech'
265
+ elif input_mode == InputMode.LANGUAGE:
266
+ self.unset_lora_adapter()
267
+ # self.set_lora_adapter('text')
268
+ else:
269
+ raise ValueError(f"Invalid input_mode: {input_mode}")
270
+
271
+ is_training = token_type_ids is not None and labels is not None
272
+
273
+ # Replace image id woth PAD if the image token if OOV, to avoid index-errors
274
+ if input_ids is not None or self.config.audio_token_index >= self.vocab_size:
275
+ special_audio_mask = input_ids == self.config.audio_token_index
276
+ llm_input_ids = input_ids.clone()
277
+ llm_input_ids[special_audio_mask] = 0
278
+ else:
279
+ llm_input_ids = input_ids
280
+
281
+ if inputs_embeds is None:
282
+ inputs_embeds = self.language_model.model.embed_tokens(input_ids)
283
+ inputs_embeds = inputs_embeds.to(dtype=self.dtype)
284
+ if cache_position is None:
285
+ past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
286
+ cache_position = torch.arange(
287
+ past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
288
+ )
289
+
290
+ if position_ids is None:
291
+ position_ids = cache_position.unsqueeze(0) + 1 # Gemma3 positions are 1-indexed
292
+ # Merge text and audios
293
+ if input_audio_embeds is not None:
294
+ input_audio_embeds=input_audio_embeds.to(inputs_embeds.device, inputs_embeds.dtype)
295
+ if audio_attention_mask is not None:
296
+ audio_attention_mask=audio_attention_mask.to(inputs_embeds.device, inputs_embeds.dtype)
297
+ audio_features = self.get_audio_features(input_audio_embeds, audio_attention_mask, audio_embed_sizes)
298
+ if input_ids is None:
299
+ special_audio_mask = inputs_embeds == self.get_input_embeddings()(
300
+ torch.tensor(self.config.audio_token_index, dtype=torch.long, device=inputs_embeds.device)
301
+ )
302
+ else:
303
+ special_audio_mask = (input_ids == self.config.audio_token_index).unsqueeze(-1)
304
+ special_audio_mask = special_audio_mask.expand_as(inputs_embeds).to(inputs_embeds.device)
305
+ masked_audio_features = []
306
+ for i, size in enumerate(audio_embed_sizes):
307
+ masked_audio_features.append(audio_features[i, :size, :])
308
+ masked_audio_features = torch.cat(masked_audio_features, dim=0)
309
+ if not is_torchdynamo_compiling() and inputs_embeds[special_audio_mask].numel() != masked_audio_features.numel():
310
+ audio_tokens_in_text = (special_audio_mask).sum(dim=1).sum(dim=0)[0]
311
+ masked_audio_size = audio_embed_sizes#.sum()[0]
312
+ raise ValueError(
313
+ f"Number of audio does not match number of special audio tokens in the input text. "
314
+ f"Got {audio_tokens_in_text} audio tokens in the text but {masked_audio_size} "
315
+ "tokens from audio embeddings. "
316
+ f"{masked_audio_features.numel()} \n"
317
+ f"{inputs_embeds[special_audio_mask].numel()} \n"
318
+ f"{audio_features} \n"
319
+ f"{inputs_embeds[special_audio_mask]} \n"
320
+ f"{special_audio_mask} \n"
321
+ )
322
+ masked_audio_features = masked_audio_features.to(inputs_embeds.device, inputs_embeds.dtype)
323
+ inputs_embeds = inputs_embeds.masked_scatter(special_audio_mask, masked_audio_features)
324
+ # mask out pad-token-ids in labels for BC
325
+ if labels is not None and self.pad_token_id in labels:
326
+ logger.warning_once(
327
+ "`labels` contains `pad_token_id` which will be masked with `config.ignore_index`. "
328
+ "You have to mask out `pad_token_id` when preparing `labels`, this behavior will be removed in v.4.46.",
329
+ )
330
+ labels = torch.where(input_ids == self.pad_token_id, self.config.ignore_index, labels)
331
+
332
+ causal_mask = self._update_causal_mask(
333
+ attention_mask, token_type_ids, past_key_values, cache_position, inputs_embeds, is_training
334
+ )
335
+
336
+ outputs = self.language_model(
337
+ attention_mask=causal_mask,
338
+ position_ids=position_ids,
339
+ past_key_values=past_key_values,
340
+ inputs_embeds=inputs_embeds,
341
+ use_cache=use_cache,
342
+ output_attentions=output_attentions,
343
+ output_hidden_states=output_hidden_states,
344
+ return_dict=return_dict,
345
+ cache_position=cache_position,
346
+ logits_to_keep=logits_to_keep,
347
+ **lm_kwargs,
348
+ )
349
+ logits = outputs.logits
350
+ loss = None
351
+ # print('#############################')
352
+ # print(logits)
353
+ if labels is not None:
354
+ # Upcast to float if we need to compute the loss to avoid potential precision issues
355
+ logits = logits.float()
356
+ shift_logits = logits[..., :-1, :]
357
+ shift_labels = labels[..., 1:]
358
+ if attention_mask is not None:
359
+ # we use the input attention mask to shift the logits and labels, because it is 2D.
360
+ # we also crop attn mask in case it is longer, which happens in PrefixTuning with peft
361
+ shift_attention_mask = attention_mask[:, -shift_logits.shape[1] :].to(logits.device)
362
+ shift_logits = shift_logits[shift_attention_mask.to(logits.device) != 0].contiguous()
363
+ shift_labels = shift_labels[shift_attention_mask.to(shift_labels.device) != 0].contiguous()
364
+ else:
365
+ shift_logits = shift_logits.contiguous()
366
+ shift_labels = shift_labels.contiguous()
367
+ # Flatten the tokens
368
+ loss_fct = nn.CrossEntropyLoss()
369
+
370
+ flat_logits = shift_logits.view(-1, self.config.text_config.vocab_size)
371
+ flat_labels = shift_labels.view(-1).to(shift_logits.device)
372
+ loss = loss_fct(flat_logits, flat_labels)
373
+ # print('flat logits',flat_logits)
374
+ # print(flat_labels)
375
+ # print(loss)
376
+ if not return_dict:
377
+ output = (logits,) + outputs[1:]
378
+ return (loss,) + output if loss is not None else output
379
+
380
+ return CausalLMOutputWithPast(
381
+ loss=loss,
382
+ logits=logits,
383
+ past_key_values=outputs.past_key_values,
384
+ hidden_states=outputs.hidden_states,
385
+ attentions=outputs.attentions,
386
+ # image_hidden_states=image_features if pixel_values is not None else None,
387
+ # audio_hidden_states=audio_features if input_audio_embeds is not None else None,
388
+ )
389
+
390
+ def prepare_inputs_for_generation(
391
+ self,
392
+ input_ids,
393
+ past_key_values=None,
394
+ input_modes=None,
395
+ inputs_embeds=None,
396
+ cache_position=None,
397
+ position_ids=None,
398
+ pixel_values=None,
399
+ input_audio_embeds=None,
400
+ audio_embed_sizes=None,
401
+ audio_attention_mask=None,
402
+ attention_mask=None,
403
+ token_type_ids=None,
404
+ use_cache=True,
405
+ logits_to_keep=None,
406
+ labels=None,
407
+ **kwargs,
408
+ ):
409
+ # Overwritten -- custom `position_ids` and `pixel_values` handling
410
+ model_inputs = self.language_model.prepare_inputs_for_generation(
411
+ input_ids,
412
+ past_key_values=past_key_values,
413
+ input_modes=input_modes,
414
+ inputs_embeds=inputs_embeds,
415
+ attention_mask=attention_mask,
416
+ position_ids=position_ids,
417
+ cache_position=cache_position,
418
+ use_cache=use_cache,
419
+ logits_to_keep=logits_to_keep,
420
+ token_type_ids=token_type_ids,
421
+ **kwargs,
422
+ )
423
+
424
+ # position_ids in Gemma3 are 1-indexed
425
+ if model_inputs.get("position_ids") is not None:
426
+ model_inputs["position_ids"] += 1
427
+ # If we're in cached decoding stage, pixel values should be None because input ids do not contain special image token anymore
428
+ # Otherwise we need pixel values to be passed to model. NOTE: use_cache=False needs pixel_values always
429
+ if cache_position[0] == 0:
430
+ model_inputs["pixel_values"] = pixel_values
431
+ model_inputs["input_audio_embeds"] = input_audio_embeds
432
+ model_inputs["audio_embed_sizes"] = audio_embed_sizes
433
+ model_inputs["audio_attention_mask"] = audio_attention_mask
434
+ model_inputs["input_modes"] = input_modes
435
+ is_training = token_type_ids is not None and labels is not None
436
+ if cache_position[0] == 0 and isinstance(past_key_values, HybridCache):
437
+ input_tensor = inputs_embeds if inputs_embeds is not None else input_ids
438
+ causal_mask = self._update_causal_mask(
439
+ attention_mask, token_type_ids, past_key_values, cache_position, input_tensor, is_training
440
+ )
441
+ model_inputs["attention_mask"] = causal_mask
442
+
443
+ return model_inputs
444
+
445
+ def tie_weights(self):
446
+ return self.language_model.tie_weights()
447
+
preprocessing_NemotronOmni.py ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import List, Optional, Union, Tuple
3
+ from math import ceil
4
+
5
+ import numpy as np
6
+ import torch
7
+ import scipy
8
+ from torch.nn.utils.rnn import pad_sequence
9
+
10
+ from enum import Enum
11
+
12
+ from transformers import AutoFeatureExtractor
13
+ from transformers.feature_extraction_utils import BatchFeature
14
+ from transformers.feature_extraction_sequence_utils import SequenceFeatureExtractor
15
+ from transformers.image_utils import ImageInput, make_nested_list_of_images
16
+ from transformers.processing_utils import ImagesKwargs, ProcessingKwargs, ProcessorMixin, Unpack, AudioKwargs
17
+ from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
18
+ from transformers.utils import to_py_obj, TensorType
19
+ from transformers.audio_utils import AudioInput
20
+
21
+
22
+
23
+
24
+ def speechlib_mel(sample_rate, n_fft, n_mels, fmin=None, fmax=None):
25
+ """Create a Mel filter-bank the same as SpeechLib FbankFC.
26
+
27
+ Args:
28
+ sample_rate (int): Sample rate in Hz. number > 0 [scalar]
29
+ n_fft (int): FFT size. int > 0 [scalar]
30
+ n_mel (int): Mel filter size. int > 0 [scalar]
31
+ fmin (float): lowest frequency (in Hz). If None use 0.0.
32
+ float >= 0 [scalar]
33
+ fmax: highest frequency (in Hz). If None use sample_rate / 2.
34
+ float >= 0 [scalar]
35
+
36
+ Returns
37
+ out (numpy.ndarray): Mel transform matrix
38
+ [shape=(n_mels, 1 + n_fft/2)]
39
+ """
40
+
41
+ bank_width = int(n_fft // 2 + 1)
42
+ if fmax is None:
43
+ fmax = sample_rate / 2
44
+ if fmin is None:
45
+ fmin = 0
46
+ assert fmin >= 0, "fmin cannot be negtive"
47
+ assert fmin < fmax <= sample_rate / 2, "fmax must be between (fmin, samplerate / 2]"
48
+
49
+ def mel(f):
50
+ return 1127.0 * np.log(1.0 + f / 700.0)
51
+
52
+ def bin2mel(fft_bin):
53
+ return 1127.0 * np.log(1.0 + fft_bin * sample_rate / (n_fft * 700.0))
54
+
55
+ def f2bin(f):
56
+ return int((f * n_fft / sample_rate) + 0.5)
57
+
58
+ # Spec 1: FFT bin range [f2bin(fmin) + 1, f2bin(fmax) - 1]
59
+ klo = f2bin(fmin) + 1
60
+ khi = f2bin(fmax)
61
+
62
+ khi = max(khi, klo)
63
+
64
+ # Spec 2: SpeechLib uses trianges in Mel space
65
+ mlo = mel(fmin)
66
+ mhi = mel(fmax)
67
+ m_centers = np.linspace(mlo, mhi, n_mels + 2)
68
+ ms = (mhi - mlo) / (n_mels + 1)
69
+
70
+ matrix = np.zeros((n_mels, bank_width), dtype=np.float32)
71
+ for m in range(0, n_mels):
72
+ left = m_centers[m]
73
+ center = m_centers[m + 1]
74
+ right = m_centers[m + 2]
75
+ for fft_bin in range(klo, khi):
76
+ mbin = bin2mel(fft_bin)
77
+ if left < mbin < right:
78
+ matrix[m, fft_bin] = 1.0 - abs(center - mbin) / ms
79
+
80
+ return matrix
81
+
82
+
83
+ class NemotronAudioFeatureExtractor(SequenceFeatureExtractor):
84
+ model_input_names = ["input_audio_embeds", "audio_embed_sizes", "audio_attention_mask"]
85
+
86
+ def __init__(self, audio_compression_rate=8,
87
+ audio_downsample_rate=1,
88
+ audio_feat_stride=1,
89
+ feature_size = 80,
90
+ sampling_rate = 16000,
91
+ padding_value = 0.0,
92
+ **kwargs):
93
+
94
+ super().__init__(feature_size=feature_size,
95
+ sampling_rate=sampling_rate,
96
+ padding_value=padding_value, **kwargs)
97
+
98
+ self.compression_rate = audio_compression_rate
99
+ self.qformer_compression_rate = audio_downsample_rate
100
+ self.feat_stride = audio_feat_stride
101
+
102
+ self._eightk_method = "fillzero"
103
+ self._mel = speechlib_mel(self.sampling_rate, 512, self.feature_size, fmin=None, fmax=self.sampling_rate//2-self.feature_size-230).T
104
+
105
+ self._hamming400 = np.hamming(400) # for 16k audio
106
+ self._hamming200 = np.hamming(200) # for 8k audio
107
+
108
+ def duration_to_frames(self, duration):
109
+ """duration in s, estimated frames"""
110
+ frame_rate = 10
111
+
112
+ num_frames = duration * 1000 // frame_rate
113
+ return num_frames
114
+
115
+ def __call__(
116
+ self,
117
+ audios: List[AudioInput],
118
+ sampling_rate = 16000,
119
+ return_attention_mask=True,
120
+ padding="max_length",
121
+ return_tensors: Optional[Union[str, TensorType]] = None,
122
+ ):
123
+ # Ref: https://github.com/huggingface/transformers/blob/v4.47.0/src/transformers/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.py#L161
124
+ returned_input_audio_embeds = []
125
+ returned_audio_embed_sizes = []
126
+ audio_frames_list = []
127
+
128
+ for audio_data in audios:
129
+ audio_embeds = self._extract_features(audio_data, sampling_rate)
130
+ audio_frames = len(audio_embeds) * self.feat_stride
131
+ audio_embed_size = self._compute_audio_embed_size(audio_frames)
132
+
133
+ returned_input_audio_embeds.append(torch.tensor(audio_embeds))
134
+ returned_audio_embed_sizes.append(torch.tensor(audio_embed_size).long())
135
+ audio_frames_list.append(audio_frames)
136
+
137
+ returned_input_audio_embeds = pad_sequence(
138
+ returned_input_audio_embeds, batch_first=True
139
+ )
140
+ returned_audio_embed_sizes = torch.stack(returned_audio_embed_sizes, dim=0)
141
+ audio_frames = torch.tensor(audio_frames_list)
142
+ returned_audio_attention_mask = torch.arange(0, audio_frames.max()).unsqueeze(0) < audio_frames.unsqueeze(1) if len(audios) > 1 else None
143
+
144
+ data = {
145
+ "input_audio_embeds": returned_input_audio_embeds,
146
+ "audio_embed_sizes": returned_audio_embed_sizes,
147
+ }
148
+ if returned_audio_attention_mask is not None and return_attention_mask:
149
+ data["audio_attention_mask"] = returned_audio_attention_mask
150
+
151
+ return BatchFeature(data=data, tensor_type=return_tensors)
152
+
153
+ def _extract_spectrogram(self, wav, fs):
154
+ """Extract spectrogram features from waveform.
155
+ Args:
156
+ wav (1D array): waveform of the input
157
+ fs (int): sampling rate of the waveform, 16000 or 8000.
158
+ If fs=8000, the waveform will be resampled to 16000Hz.
159
+ Output:
160
+ log_fbank (2D array): a TxD matrix of log Mel filterbank features.
161
+ D=80, and T is the number of frames.
162
+ """
163
+ if wav.ndim > 1:
164
+ wav = np.squeeze(wav)
165
+
166
+ # by default, we extract the mean if stereo
167
+ if len(wav.shape) == 2:
168
+ wav = wav.mean(1)
169
+
170
+ # Resample to 16000 or 8000 if needed
171
+ if fs > 16000:
172
+ wav = scipy.signal.resample_poly(wav, 1, fs // 16000)
173
+ fs = 16000
174
+ elif 8000 < fs < 16000:
175
+ wav = scipy.signal.resample_poly(wav, 1, fs // 8000)
176
+ fs = 8000
177
+ elif fs < 8000:
178
+ raise RuntimeError(f"Unsupported sample rate {fs}")
179
+
180
+ if fs == 8000:
181
+ if self._eightk_method == "resample":
182
+ # Input audio is 8 kHz. Convert to 16 kHz before feature
183
+ # extraction
184
+ wav = scipy.signal.resample_poly(wav, 2, 1)
185
+ fs = 16000
186
+ # Do nothing here for fillzero method
187
+ elif fs != 16000:
188
+ # Input audio is not a supported sample rate.
189
+ raise RuntimeError(f"Input data using an unsupported sample rate: {fs}")
190
+
191
+ preemphasis = 0.97
192
+
193
+ if fs == 8000:
194
+ n_fft = 256
195
+ win_length = 200
196
+ hop_length = 80
197
+ fft_window = self._hamming200
198
+ elif fs == 16000:
199
+ n_fft = 512
200
+ win_length = 400
201
+ hop_length = 160
202
+ fft_window = self._hamming400
203
+
204
+ # Spec 1: SpeechLib cut remaining sample insufficient for a hop
205
+ n_batch = (wav.shape[0] - win_length) // hop_length + 1
206
+ # Here we don't use stride_tricks since the input array may not satisfy
207
+ # memory layout requirement and we need writeable output
208
+ # Here we only use list of views before copy to desination
209
+ # so it is more efficient than broadcasting
210
+ y_frames = np.array(
211
+ [wav[_stride : _stride + win_length] for _stride in range(0, hop_length * n_batch, hop_length)],
212
+ dtype=np.float32,
213
+ )
214
+
215
+ # Spec 2: SpeechLib applies preemphasis within each batch
216
+ y_frames_prev = np.roll(y_frames, 1, axis=1)
217
+ y_frames_prev[:, 0] = y_frames_prev[:, 1]
218
+ y_frames = (y_frames - preemphasis * y_frames_prev) * 32768
219
+
220
+ S = np.fft.rfft(fft_window * y_frames, n=n_fft, axis=1).astype(np.complex64)
221
+
222
+ if fs == 8000:
223
+ # Need to pad the output to look like 16 kHz data but with zeros in
224
+ # the 4 to 8 kHz bins.
225
+ frames, bins = S.shape
226
+ padarray = np.zeros((frames, bins))
227
+ S = np.concatenate((S[:, 0:-1], padarray), axis=1) # Nyquist bin gets set to zero
228
+
229
+ spec = np.abs(S).astype(np.float32)
230
+ return spec
231
+
232
+ def _extract_features(self, wav, fs):
233
+ """Extract log filterbank features from waveform.
234
+ Args:
235
+ wav (1D array): waveform of the input
236
+ fs (int): sampling rate of the waveform, 16000 or 8000.
237
+ If fs=8000, the waveform will be resampled to 16000Hz.
238
+ Output:
239
+ log_fbank (2D array): a TxD matrix of log Mel filterbank features.
240
+ D=80, and T is the number of frames.
241
+ """
242
+ spec = self._extract_spectrogram(wav, fs)
243
+ spec_power = spec**2
244
+
245
+ fbank_power = np.clip(spec_power.dot(self._mel), 1.0, None)
246
+ log_fbank = np.log(fbank_power).astype(np.float32)
247
+
248
+ return log_fbank
249
+
250
+ def _compute_audio_embed_size(self, audio_frames):
251
+ integer = audio_frames // self.compression_rate
252
+ remainder = audio_frames % self.compression_rate
253
+
254
+ result = integer if remainder == 0 else integer + 1
255
+
256
+ integer = result // self.qformer_compression_rate
257
+ remainder = result % self.qformer_compression_rate
258
+ result = integer if remainder == 0 else integer + 1 # qformer compression
259
+
260
+ return result
261
+
262
+ class NemotronOmniProcessor(ProcessorMixin):
263
+ attributes = ["image_processor", "feature_extractor", "tokenizer"]
264
+ valid_kwargs = ["chat_template", "image_seq_length"]
265
+ image_processor_class = "AutoImageProcessor"
266
+ feature_extractor_class = "NemotronAudioFeatureExtractor"
267
+ tokenizer_class = "AutoTokenizer"
268
+
269
+ def __init__(
270
+ self,
271
+ image_processor,
272
+ feature_extractor,
273
+ tokenizer,
274
+ chat_template=None,
275
+ image_seq_length: int = 256,
276
+ **kwargs,
277
+ ):
278
+ self.image_seq_length = image_seq_length
279
+ self.image_token_id = -999#tokenizer.image_token_id
280
+ self.boi_token = ''#tokenizer.boi_token
281
+ self.image_token = ''#tokenizer.image_token
282
+ self.eoi_token=''
283
+ image_tokens_expanded = "".join([self.image_token] * image_seq_length)
284
+ self.full_image_sequence = f"\n\n{self.boi_token}{image_tokens_expanded}{self.eoi_token}\n\n"
285
+
286
+ self.audio_token_id = 128255
287
+ self.boa_token = "<start_of_audio>"
288
+ self.eoa_token = "<end_of_audio>"
289
+ self.audio_token = "<audio_soft_token>"
290
+
291
+ super().__init__(
292
+ image_processor=image_processor,
293
+ feature_extractor=feature_extractor,
294
+ tokenizer=tokenizer,
295
+ chat_template=chat_template,
296
+ **kwargs,
297
+ )
298
+
299
+ def __call__(
300
+ self,
301
+ images: ImageInput = None,
302
+ text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
303
+ videos=None,
304
+ audio: List[AudioInput] = None,
305
+ return_tensors: Optional[Union[str, TensorType]] = None,
306
+ ) -> BatchFeature:
307
+ if text is None and images is None:
308
+ raise ValueError("Provide at least one of `text` or `audio`.")
309
+
310
+
311
+ if isinstance(text, str):
312
+ text = [text]
313
+ elif not isinstance(text, list) and not isinstance(text[0], str):
314
+ raise ValueError("Invalid input text. Please provide a string, or a list of strings")
315
+
316
+
317
+ audio_inputs = {}
318
+ if audio is not None:
319
+ full_audio_sequences = []
320
+ audio_inputs = self.feature_extractor(audio)
321
+ for i, embed_size in enumerate(audio_inputs.audio_embed_sizes):
322
+ audio_tokens_expanded = "".join([self.audio_token] * embed_size)
323
+ full_audio_sequence = f"\n\n{self.boa_token}{audio_tokens_expanded}{self.eoa_token}\n\n"
324
+ full_audio_sequences.append(full_audio_sequence)
325
+
326
+ text = [prompt.replace(self.boa_token, audio_sequences) for (prompt, audio_sequences) in zip(text, full_audio_sequences)]
327
+
328
+ text_inputs = self.tokenizer(text=text, return_tensors="np")
329
+
330
+ # Add token type ids manually, as tokenizer can't do arbitrary position token types
331
+ array_ids = text_inputs["input_ids"]
332
+ mm_token_type_ids = np.zeros_like(text_inputs["input_ids"])
333
+ mm_token_type_ids[array_ids == self.image_token_id] = 1
334
+ mm_token_type_ids[array_ids == self.audio_token_id] = 2
335
+
336
+ has_vision_ids = np.any(mm_token_type_ids == 1, axis=1)
337
+ has_audio_ids = np.any(mm_token_type_ids == 2, axis=1)
338
+
339
+ input_modes = (has_audio_ids << 1) | has_vision_ids
340
+
341
+ text_inputs = {k: v.tolist() for k, v in text_inputs.items()} # in case user requested list inputs
342
+ text_inputs["token_type_ids"] = mm_token_type_ids.tolist()
343
+ text_inputs["input_modes"] = input_modes.tolist()
344
+
345
+ return BatchFeature(data={**text_inputs, **audio_inputs}, tensor_type=return_tensors)
346
+
347
+ # Copied from transformers.models.clip.processing_clip.CLIPProcessor.batch_decode with CLIP->Gemma
348
+ def batch_decode(self, *args, **kwargs):
349
+ """
350
+ This method forwards all its arguments to GemmaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
351
+ refer to the docstring of this method for more information.
352
+ """
353
+ return self.tokenizer.batch_decode(*args, **kwargs)
354
+
355
+ # Copied from transformers.models.clip.processing_clip.CLIPProcessor.decode with CLIP->Gemma
356
+ def decode(self, *args, **kwargs):
357
+ """
358
+ This method forwards all its arguments to GemmaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
359
+ the docstring of this method for more information.
360
+ """
361
+ return self.tokenizer.decode(*args, **kwargs)
362
+
363
+ @property
364
+ def model_input_names(self):
365
+ tokenizer_input_names = self.tokenizer.model_input_names + ["token_type_ids"]
366
+ image_processor_input_names = self.image_processor.model_input_names
367
+ return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
368
+
369
+ AutoFeatureExtractor.register("NemotronAudioFeatureExtractor", NemotronAudioFeatureExtractor)
preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor_type": "LlamaNemotronNanoVLImageProcessor",
3
+ "auto_map": {
4
+ "AutoImageProcessor": "image_processing.LlamaNemotronNanoVLImageProcessor"
5
+ },
6
+ "audio_compression_rate": 8,
7
+ "audio_downsample_rate": 1,
8
+ "audio_feat_stride": 1,
9
+ "compression_rate": 8,
10
+ "do_convert_rgb": null,
11
+ "do_normalize": true,
12
+ "do_pan_and_scan": null,
13
+ "do_rescale": true,
14
+ "do_resize": true,
15
+ "feat_stride": 1,
16
+ "feature_extractor_type": "NemotronAudioFeatureExtractor",
17
+ "processor_class": "NemotronOmniProcessor",
18
+ "feature_size": 80,
19
+ "padding_side": "right",
20
+ "padding_value": 0.0,
21
+ "pan_and_scan_max_num_crops": null,
22
+ "pan_and_scan_min_crop_size": null,
23
+ "pan_and_scan_min_ratio_to_activate": null,
24
+ "qformer_compression_rate": 1,
25
+ "resample": 2,
26
+ "rescale_factor": 0.00392156862745098,
27
+ "return_attention_mask": true,
28
+ "sampling_rate": 16000
29
+ }
processor_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoProcessor": "preprocessing_NemotronOmni.NemotronOmniProcessor"
4
+ },
5
+ "processor_class": "NemotronOmniProcessor"
6
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|eot_id|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "boa_token": {
17
+ "content": "<start_of_audio>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "eoa_token": {
24
+ "content": "<end_of_audio>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "audio_token": {
31
+ "content": "<audio_soft_token>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
speech_conformer_encoder.py ADDED
The diff for this file is too large to render. See raw diff
 
test.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd2ec0ed10737c65e5a71158bdd29bc0f8c391462e7c78562f48c7e173d265d4
3
+ size 17209878
tokenizer_config.json ADDED
@@ -0,0 +1,2063 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|finetune_right_pad_id|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_2|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|eom_id|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|python_tag|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_3|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_4|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_5|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_6|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_7|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_8|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_9|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_10|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_11|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_12|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_13|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_14|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_15|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_16|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_17|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_18|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_19|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_20|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_21|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_22|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_23|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_24|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_25|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_26|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_27|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_28|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_29|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_30|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_31|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_32|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_33|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_34|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_35|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_36|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_37|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_38|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_39|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_40|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_41|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_42|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_43|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_44|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_45|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_46|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_47|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_48|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_49|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_50|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_51|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_52|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_53|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_54|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_55|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_56|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_57|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_58|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_59|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_60|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_61|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_62|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_63|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_64|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_65|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_66|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_67|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_68|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_69|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_70|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_71|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_72|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_73|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_74|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_75|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_76|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_77|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_78|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_79|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_80|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_81|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_82|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_83|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_84|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_85|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_86|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_87|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_88|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_89|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_90|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_91|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_92|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_93|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_94|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_95|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_96|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_97|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_98|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_99|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_100|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_101|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_102|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_103|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_104|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_105|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_106|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_107|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_108|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_109|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_110|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_111|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_112|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_113|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_114|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_115|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_116|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_117|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_118|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_119|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_120|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_121|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_122|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_123|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_124|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_125|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_126|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_127|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_128|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_129|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_130|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_131|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_132|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_133|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_134|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_135|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_136|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_137|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_138|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_139|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_140|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_141|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_142|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_143|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_144|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_145|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_146|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_147|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_148|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_149|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_150|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_151|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_152|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_153|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_154|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_155|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_156|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_157|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_158|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_159|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_160|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_161|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_162|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_163|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_164|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_165|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_166|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_167|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_168|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_169|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_170|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_171|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_172|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_173|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_174|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_175|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_176|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_177|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_178|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_179|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_180|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_181|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_182|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_183|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_184|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_185|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_186|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_187|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_188|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_189|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_190|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_191|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_192|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_193|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_194|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_195|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_196|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_197|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_198|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_199|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_200|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_201|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_202|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_203|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_204|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_205|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_206|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_207|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_208|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_209|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_210|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_211|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_212|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_213|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_214|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_215|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_216|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_217|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_218|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_219|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_220|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_221|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_222|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_223|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_224|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_225|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_226|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_227|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_228|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_229|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_230|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_231|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_232|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_233|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_234|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_235|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_236|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_237|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_238|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_239|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_240|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_241|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_242|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_243|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_244|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<start_of_audio>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<end_of_audio>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<audio_soft_token>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{%- if tools %}{{- '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n' -}}{%- if messages[0].role == 'system' and messages[0].content != '' -%}{{- messages[0].content + '\n\n' -}}{%- else -%}{{- 'detailed thinking off\n\n' -}}{%- endif -%}{{- '<AVAILABLE_TOOLS>[' -}}{%- for tool in tools -%}{{- (tool.function if tool.function is defined else tool) | tojson -}}{{- ', ' if not loop.last else '' -}}{%- endfor -%}{{- ']</AVAILABLE_TOOLS>' -}}{{- '<|eot_id|>' -}}{%- else %}{{- '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n' -}}{%- if messages[0].role == 'system' and messages[0].content != '' -%}{{- messages[0].content -}}{%- else -%}{{- 'detailed thinking off' -}}{%- endif %}{{- '<|eot_id|>' -}}{%- endif %}{%- for message in messages -%}{%- if (message.role == 'user') -%}{{- '<|start_header_id|>user<|end_header_id|>\n\n' + message.content + '<|eot_id|>' -}}{%- elif message.role == 'assistant' -%}{%- set content = message.content -%}{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' + content -}}{%- if message.tool_calls -%}{{- '<TOOLCALL>[' -}}{%- for tool_call in message.tool_calls -%}{%- if tool_call.function -%}{%- set tool_call = tool_call.function -%}{%- endif -%}{{- '{\"name\": \"' }}{{- tool_call.name }}{{- '\", \"arguments\": ' -}}{%- if tool_call.arguments is string -%}{{- tool_call.arguments -}}{%- else -%}{{- tool_call.arguments | tojson -}}{%- endif -%}{{- ', ' if not loop.last else '' -}}{%- endfor -%}{{- ']</TOOLCALL>' -}}{%- endif %}{{- '<|eot_id|>' -}}{%- elif message.role == 'tool' -%}{%- if loop.first or (messages[loop.index0 - 1].role != 'tool') -%}{{- '<|start_header_id|>user<|end_header_id|>\n\n' }}{{- '<TOOL_RESPONSE>[' -}}{%- endif -%}{{- message.content -}}{{- ', ' if not loop.last and (messages[loop.index0 + 1].role == 'tool') else '' -}}{%- if loop.last or (messages[loop.index0 + 1].role != 'tool') -%}{{- ']</TOOL_RESPONSE>' -}}{{- '<|eot_id|>' -}}{%- endif %}{%- endif %}{%- endfor %}{%- if add_generation_prompt %}{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}{%- endif %}",
2054
+ "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|eot_id|>",
2056
+ "extra_special_tokens": {},
2057
+ "model_input_names": [
2058
+ "input_ids",
2059
+ "attention_mask"
2060
+ ],
2061
+ "model_max_length": 131072,
2062
+ "tokenizer_class": "PreTrainedTokenizerFast"
2063
+ }
training.py ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datasets
2
+ datasets.config.DOWNLOADED_DATASETS_PATH = "/mnt/jeff/huggingface/data"
3
+ import os
4
+ os.environ['HF_HOME'] = '/mnt/jeff/huggingface'
5
+
6
+ import argparse
7
+ import json
8
+ import os
9
+ from pathlib import Path
10
+
11
+ import numpy as np
12
+ import torch
13
+ import sacrebleu
14
+
15
+ from datasets import load_dataset
16
+ from torch.utils.data import Dataset, ConcatDataset
17
+ from tqdm import tqdm
18
+ from transformers import (
19
+ AutoProcessor,
20
+ AutoModel,
21
+ BatchFeature,
22
+ Trainer,
23
+ TrainingArguments,
24
+ StoppingCriteria,
25
+ StoppingCriteriaList,
26
+ )
27
+ from collections import defaultdict
28
+
29
+ import soundfile as sf
30
+ from datasets import Audio
31
+ import random
32
+ from ASRDataset import *
33
+
34
+
35
+ def count_parameters_by_module(model):
36
+ # dictionary for parameters number by modules
37
+ module_params = defaultdict(lambda: {"total": 0, "trainable": 0})
38
+
39
+ # all params
40
+ total_params = 0
41
+ total_trainable_params = 0
42
+
43
+ # Check Embedding Token masks
44
+ embedding_masks = {}
45
+ for name, param in model.named_parameters():
46
+ if 'embed_tokens.weight' in name and hasattr(param, '_backward_hooks') and param._backward_hooks:
47
+ # check if params has embedding_grad_mask_hook
48
+ for hook_id, hook_fn in param._backward_hooks.items():
49
+ if hook_fn.__code__.co_name == 'embedding_grad_mask_hook':
50
+ # Accessing mask variables in the closure of hook functions
51
+ for cell in hook_fn.__closure__ or []:
52
+ if isinstance(cell.cell_contents, torch.Tensor) and cell.cell_contents.dtype == torch.bool:
53
+ # check mask tensor
54
+ embedding_masks[name] = ~cell.cell_contents # True : Trainable
55
+
56
+ # Count params by modules
57
+ for name, param in model.named_parameters():
58
+ # extracts top module_name
59
+ module_name = name.split('.')[0]
60
+ param_count = param.numel()
61
+
62
+ module_params[module_name]["total"] += param_count
63
+ total_params += param_count
64
+
65
+ if param.requires_grad:
66
+ # Only count for real trainable params. (with masks)
67
+ if name in embedding_masks:
68
+ trainable_count = embedding_masks[name].sum().item()
69
+ module_params[module_name]["trainable"] += trainable_count
70
+ total_trainable_params += trainable_count
71
+ else:
72
+ module_params[module_name]["trainable"] += param_count
73
+ total_trainable_params += param_count
74
+
75
+ print(f"All Params: {total_params:,}")
76
+ print(f"Trainable Params: {total_trainable_params:,} ({total_trainable_params/total_params*100:.2f}%)")
77
+ print("\nParams by Module:")
78
+
79
+ for module_name, counts in sorted(module_params.items()):
80
+ trainable_percentage = counts["trainable"] / counts["total"] * 100 if counts["total"] > 0 else 0
81
+ total_percentage = counts["total"] / total_params * 100
82
+
83
+ print(f"- {module_name}:")
84
+ print(f" Total: {counts['total']:,} ({total_percentage:.2f}% of model)")
85
+ print(f" Trainable: {counts['trainable']:,} ({trainable_percentage:.2f}% of module)")
86
+
87
+ return module_params
88
+
89
+ def create_model(model_name_or_path, revision="main", use_flash_attention = False):
90
+ model = AutoModel.from_pretrained(
91
+ model_name_or_path,
92
+ revision=revision,
93
+ torch_dtype=torch.bfloat16,
94
+ device_map="auto",
95
+ attn_implementation="flash_attention_2" if use_flash_attention else "eager",
96
+ trust_remote_code=True,
97
+ )
98
+
99
+ # Set use_cache to False after model loaded
100
+ model.config.use_cache = False
101
+
102
+ # Freeze all parameters
103
+ for param in model.parameters():
104
+ param.requires_grad = False
105
+
106
+ model.set_lora_adapter('speech')
107
+ model.to(torch.bfloat16)
108
+
109
+ # (Optional) unfreeze audio_tower parameters
110
+ for param in model.audio_tower.parameters():
111
+ param.requires_grad = True
112
+
113
+ # Only unfreeze audio_projector parameters
114
+ for param in model.audio_projector.parameters():
115
+ param.requires_grad = True
116
+
117
+ # (Optional) unfreeze audio embed_tokens
118
+ train_embed = True
119
+ if train_embed:
120
+ embed_tokens = model.language_model.model.model.embed_tokens
121
+
122
+ embed_tokens.weight.requires_grad = False
123
+
124
+ # Added Speech token IDs (only this tokens be trainable)
125
+ trainable_token_ids = [128253, 128254]
126
+
127
+ embed_tokens.weight.requires_grad = True
128
+ mask = torch.ones_like(embed_tokens.weight, dtype=torch.bool)
129
+ mask[trainable_token_ids] = False # Trainable Tokens are False (unfreeze), else True (freeze)
130
+
131
+ # backward hook, with gradient masking
132
+ def embedding_grad_mask_hook(grad):
133
+ return grad.masked_fill(mask, 0)
134
+
135
+ embed_tokens.weight.register_hook(embedding_grad_mask_hook)
136
+
137
+ model.language_model.model.model.embed_tokens = embed_tokens
138
+
139
+ count_parameters_by_module(model)
140
+
141
+ return model
142
+
143
+ model_name_or_path = '/mnt/jeff/InCar/LlamaNemotronOmni/Llama-NemotronOmni'
144
+ use_flash_attention = False
145
+
146
+ output_dir = '../nemotron_tmp3'
147
+ batch_size = 128
148
+ batch_size_per_gpu = 4
149
+ learning_rate = 4.0e-5 # 1.0e-4 for fine-tuning
150
+ wd = 0.01
151
+ num_train_epochs = 10
152
+
153
+ revision = "main" #"v1.0"
154
+
155
+ processor = AutoProcessor.from_pretrained(
156
+ model_name_or_path,
157
+ revision=revision,
158
+ trust_remote_code=True,
159
+ )
160
+
161
+ model = create_model(
162
+ model_name_or_path,
163
+ revision=revision,
164
+ use_flash_attention=use_flash_attention,
165
+ )
166
+
167
+ train_datasets = []
168
+
169
+ custom_yating_tw_loc = TWCostumData(processor=processor,
170
+ csv_path='/mnt/jeff/InCar/data/tw_data/taiwan_location-srdc_tts-20250505-yating-1-2s-breezyvoice.csv')
171
+ train_datasets.append(custom_yating_tw_loc) # 9458
172
+
173
+ custom_tw_loc = TWCostumData(processor=processor,
174
+ csv_path='/mnt/jeff/InCar/data/tw_data/taiwan_location-srdc_tts-20250509-common_voice_16_1-TW.csv')
175
+ train_datasets.append(custom_tw_loc) # 1500
176
+
177
+ custom_tw_loc2 = TWCostumData(processor=processor,
178
+ csv_path='/mnt/jeff/InCar/data/tw_data/taiwan_location-srdc_tts-20250529-common_voice_16_1-TW.csv')
179
+ train_datasets.append(custom_tw_loc2) # 9458
180
+
181
+ # custom_yating_tw_road = TWCostumData(processor=processor,
182
+ # csv_path='/mnt/jeff/InCar/data/tw_data/taiwan_road-srdc_tts-20250430-yating-1-2s-breezyvoice.csv')
183
+ # train_datasets.append(custom_yating_tw_road) # 35224
184
+
185
+ custom_tw_road = TWCostumData(processor=processor,
186
+ csv_path='/mnt/jeff/InCar/data/tw_data/taiwan_road-srdc_tts-20250509-common_voice_16_1-TW.csv')
187
+ train_datasets.append(custom_tw_road) # 1500
188
+
189
+ custom_tw_road2 = TWCostumData(processor=processor,
190
+ csv_path='/mnt/jeff/InCar/data/tw_data/taiwan_road-srdc_tts-20250529-common_voice_16_1-TW.csv')
191
+ train_datasets.append(custom_tw_road2) # 35224
192
+
193
+
194
+
195
+ # common voice asr
196
+ commonvoice_speech_tw2 = CommonVoiceDataset( # 45689
197
+ processor=processor,
198
+ source_lang="zh-TW",
199
+ split="other"
200
+ )
201
+ train_datasets.append(commonvoice_speech_tw2)
202
+
203
+ commonvoice_speech_cn = CommonVoiceDataset( #28868
204
+ processor=processor,
205
+ source_lang="zh-CN",
206
+ split="train[:10%]"
207
+ )
208
+ train_datasets.append(commonvoice_speech_cn)
209
+
210
+
211
+ commonvoice_speech_tw = CommonVoiceDataset( #6812
212
+ processor=processor,
213
+ source_lang="zh-TW",
214
+ split="train"
215
+ )
216
+ train_datasets.append(commonvoice_speech_tw)
217
+
218
+
219
+
220
+
221
+ # Libri Speech Clean ASR mode (English -> English text)
222
+ libri_speech_clean = LibriSpeechDataset( #103781
223
+ processor=processor,
224
+ subset="clean",
225
+ split="train.360"
226
+ )
227
+ train_datasets.append(libri_speech_clean)
228
+
229
+
230
+ # Fleurs ASR mode (English -> English text)
231
+ en_asr_fleurs = FleursDataset( #2550
232
+ processor=processor,
233
+ split="train",
234
+ source_lang="en_us", # English
235
+ mode="asr"
236
+ )
237
+ train_datasets.append(en_asr_fleurs)
238
+
239
+ ch_asr_fleurs = FleursDataset( #3152
240
+ processor=processor,
241
+ split="train",
242
+ source_lang="cmn_hans_cn",
243
+ mode="asr"
244
+ )
245
+ train_datasets.append(ch_asr_fleurs)
246
+
247
+
248
+
249
+
250
+
251
+ # en_ch_ast_fleurs = FleursDataset(
252
+ # processor=processor,
253
+ # split="train",
254
+ # source_lang="en_us",
255
+ # target_lang="cmn_hans_cn",
256
+ # mode="ast"
257
+ # )
258
+ # train_datasets.append(en_ch_ast_fleurs)
259
+
260
+ # ch_en_ast_fleurs = FleursDataset(
261
+ # processor=processor,
262
+ # split="train",
263
+ # source_lang="cmn_hans_cn",
264
+ # target_lang="en_us",
265
+ # mode="ast"
266
+ # )
267
+ # train_datasets.append(ch_en_ast_fleurs)
268
+
269
+ print("Count Num of Datasets", len(train_datasets))
270
+ print([len(dataset) for dataset in train_datasets])
271
+
272
+ # ConcatDataset
273
+ train_dataset = ConcatDataset(train_datasets) if len(train_datasets) > 1 else train_datasets[0]
274
+ print("Count Length of Datas", len(train_dataset))
275
+
276
+
277
+
278
+ # Check GPUs
279
+ num_gpus = torch.cuda.device_count()
280
+ print(f'training on {num_gpus} GPUs')
281
+
282
+ assert (
283
+ batch_size % (num_gpus * batch_size_per_gpu) == 0
284
+ ), 'Batch size must be divisible by the number of GPUs'
285
+ gradient_accumulation_steps = batch_size // (num_gpus * batch_size_per_gpu)
286
+
287
+ # hard coded training args
288
+ dp_config = {
289
+ "fp16": {
290
+ "enabled": "auto",
291
+ "loss_scale": 0,
292
+ "loss_scale_window": 1000,
293
+ "initial_scale_power": 16,
294
+ "hysteresis": 2,
295
+ "min_loss_scale": 1
296
+ },
297
+ "zero_optimization": {
298
+ "stage": 2,
299
+ "allgather_partitions": True,
300
+ "allgather_bucket_size": 5e8,
301
+ "overlap_comm": False,
302
+ "reduce_scatter": True,
303
+ "reduce_bucket_size": 5e8,
304
+ "contiguous_gradients": True,
305
+ "cpu_offload": True
306
+ },
307
+
308
+ "train_batch_size": "auto",
309
+ "gradient_accumulation_steps": "auto",
310
+ "optimizer": {
311
+ "type": "AdamW",
312
+ "params": {
313
+ "lr": "auto",
314
+ "betas": 'auto',
315
+ "eps": 'auto',
316
+ "weight_decay": "auto"
317
+ }
318
+ },
319
+ "scheduler": {
320
+ "type": "WarmupDecayLR",
321
+ "params": {
322
+ "warmup_min_lr": "auto",
323
+ "warmup_max_lr": "auto",
324
+ "warmup_num_steps": "auto",
325
+ "total_num_steps": "auto"
326
+ }
327
+ },
328
+ "gradient_clipping": 1.0,
329
+ "zero_optimization": {
330
+ "stage": 0
331
+ }
332
+ }
333
+ training_args = TrainingArguments(
334
+ num_train_epochs=num_train_epochs,
335
+ per_device_train_batch_size=batch_size_per_gpu,
336
+ gradient_checkpointing=False,
337
+ gradient_checkpointing_kwargs={'use_reentrant': False},
338
+ gradient_accumulation_steps=gradient_accumulation_steps,
339
+ optim='adamw_torch',
340
+ adam_beta1=0.9,
341
+ adam_beta2=0.95,
342
+ adam_epsilon=1e-7,
343
+ learning_rate=learning_rate,
344
+ weight_decay=wd,
345
+ max_grad_norm=1.0,
346
+ lr_scheduler_type='cosine',
347
+ warmup_steps=50,
348
+ logging_steps=10,
349
+ output_dir=output_dir,
350
+ save_total_limit=10,
351
+ save_only_model=True,
352
+ bf16=True,
353
+ fp16=False,
354
+ remove_unused_columns=False,
355
+ report_to='none',
356
+ deepspeed=None,
357
+ disable_tqdm=False,
358
+ dataloader_num_workers=16,
359
+ save_strategy='epoch',
360
+ # save_steps=2500,
361
+ ddp_find_unused_parameters=True,
362
+
363
+ )
364
+
365
+ out_path = Path(training_args.output_dir)
366
+ out_path.mkdir(parents=True, exist_ok=True)
367
+
368
+ # create optimizer only for trainable params
369
+ optimizer = torch.optim.AdamW(
370
+ filter(lambda p: p.requires_grad, model.parameters()),
371
+ lr=learning_rate,
372
+ weight_decay=wd,
373
+ betas=(0.9, 0.95),
374
+ eps=1e-7,
375
+ )
376
+
377
+ # Trainer Setting
378
+ trainer = Trainer(
379
+ model=model,
380
+ args=training_args,
381
+ data_collator=covost_collate_fn,
382
+ train_dataset=train_dataset,
383
+ optimizers=(optimizer, None)
384
+ )
385
+
386
+ trainer.train()
387
+
388
+
389
+ # # 1. Save LoRA Adapter
390
+ model.language_model.model.save_pretrained(output_dir)
391
+
392
+ # # 1-1. Delete Markdown file
393
+ # markdown_file = os.path.join(output_dir, "README.md")
394
+ # if os.path.exists(markdown_file):
395
+ # os.remove(markdown_file)
396
+
397
+ # 2. Save entire model
398
+ model.save_pretrained(output_dir)