diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ba6c5f1017559e9795904b8398064ca814e16e72 --- /dev/null +++ b/README.md @@ -0,0 +1,126 @@ +--- +base_model: PaddlePaddle/PaddleOCR-VL +library_name: peft +license: apache-2.0 +pipeline_tag: image-text-to-text +language: +- pl +tags: +- ocr +- lora +- transformers +- polish +- document-ai +- vision-language +datasets: +- synthetic-polish-ocr +--- + +# RysOCR - Polish OCR LoRA for PaddleOCR-VL + +A LoRA adapter fine-tuned on PaddleOCR-VL specifically for **Polish text recognition**, with emphasis on correct handling of Polish diacritics (ą, ć, ę, ł, ń, ó, ś, ź, ż). + +## Motivation + +Polish is underrepresented in OCR training data. Most vision-language OCR models struggle with Polish diacritics, often substituting: +- `ą` → `a` +- `ę` → `e` +- `ł` → `l` or `t` +- `ó` → `o` +- etc. + +This model addresses that gap by fine-tuning on synthetic Polish document images covering addresses, invoices, receipts, names, and common phrases. + +## Model Details + +| Property | Value | +|----------|-------| +| Base Model | [PaddlePaddle/PaddleOCR-VL](https://huggingface.co/PaddlePaddle/PaddleOCR-VL) | +| Method | LoRA (Low-Rank Adaptation) | +| LoRA Rank | 16 | +| LoRA Alpha | 32 | +| Target Modules | q_proj, k_proj, v_proj, o_proj | +| Training Framework | PEFT 0.18.0 + Transformers | + +## Usage + +```python +from transformers import AutoModelForCausalLM, AutoProcessor +from peft import PeftModel +from PIL import Image + +# Load base model +base_model = AutoModelForCausalLM.from_pretrained( + "PaddlePaddle/PaddleOCR-VL", + trust_remote_code=True, + torch_dtype="auto", + device_map="auto" +) + +# Load LoRA adapter +model = PeftModel.from_pretrained(base_model, "anon13370/RysOCR") + +processor = AutoProcessor.from_pretrained( + "anon13370/RysOCR", + trust_remote_code=True +) + +# Run inference +image = Image.open("your_document.png") +prompt = "OCR: " + +inputs = processor(images=image, text=prompt, return_tensors="pt") +inputs = {k: v.to(model.device) for k, v in inputs.items()} + +outputs = model.generate(**inputs, max_new_tokens=256) +text = processor.decode(outputs[0], skip_special_tokens=True) +print(text) +``` + +## Training Details + +- **Training Data**: 10,000 synthetic Polish document images +- **Categories**: Addresses, invoice lines, receipt lines, dates, names, prices, phrases +- **Hardware**: Trained with LoRA to enable fine-tuning on consumer hardware (4-6GB VRAM) +- **Epochs**: 1 epoch over full dataset +- **Optimizer**: AdamW with linear learning rate schedule + +## Baseline Performance (Pre-Fine-Tuning) + +Baseline PaddleOCR-VL performance on Polish test set: + +| Metric | Value | +|--------|-------| +| Character Error Rate (CER) | 5.58% | +| Word Error Rate (WER) | 13.37% | +| Exact Match | 74.00% | +| Diacritic Accuracy | 74.14% | + +Key diacritic confusions in baseline: +- `ł` frequently confused with `l` or `t` +- `ę` sometimes rendered as `e` +- `ś` confused with `š` + +## Limitations + +- Optimized for printed Polish text; handwritten recognition may vary +- Best results on clean document scans; heavily degraded images may still have errors +- Inference requires loading both base model and LoRA weights + +## License + +Apache 2.0 (same as base model) + +## Citation + +If you use this model, please cite: + +```bibtex +@misc{rysocr2024, + title={RysOCR: Polish OCR LoRA for PaddleOCR-VL}, + author={anon13370}, + year={2024}, + publisher={Hugging Face}, + url={https://huggingface.co/anon13370/RysOCR} +} +``` diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37155570d2b4ded594fb262b1747669fed865f87 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "PaddlePaddle/PaddleOCR-VL", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..efcb2b31f4f55511067774f0f0b9ca3c9fd6d88a --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fce6e77d1626eb47534f1ca676573f13c1a976460539eeb104c3f3b56bbae7b +size 22015368 diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..6a2790e1462ecb007f9b92dfb00f594701462889 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,1021 @@ +{ + "": 101308, + "": 101309, + "": 101311, + "": 101313, + "": 101312, + "": 101310, + "<|AUDIO_PLACEHOLDER|>": 100296, + "<|CROP_COL_SEP|>": 101301, + "<|CROP_ROW_SEP|>": 101302, + "<|IMAGE_END|>": 101306, + "<|IMAGE_PLACEHOLDER|>": 100295, + "<|IMAGE_SEP|>": 101303, + "<|IMAGE_START|>": 101305, + "<|LOC_0|>": 100297, + "<|LOC_1000|>": 101297, + "<|LOC_100|>": 100397, + "<|LOC_101|>": 100398, + "<|LOC_102|>": 100399, + "<|LOC_103|>": 100400, + "<|LOC_104|>": 100401, + "<|LOC_105|>": 100402, + "<|LOC_106|>": 100403, + "<|LOC_107|>": 100404, + "<|LOC_108|>": 100405, + "<|LOC_109|>": 100406, + "<|LOC_10|>": 100307, + "<|LOC_110|>": 100407, + "<|LOC_111|>": 100408, + "<|LOC_112|>": 100409, + "<|LOC_113|>": 100410, + "<|LOC_114|>": 100411, + "<|LOC_115|>": 100412, + "<|LOC_116|>": 100413, + "<|LOC_117|>": 100414, + "<|LOC_118|>": 100415, + "<|LOC_119|>": 100416, + "<|LOC_11|>": 100308, + "<|LOC_120|>": 100417, + "<|LOC_121|>": 100418, + "<|LOC_122|>": 100419, + "<|LOC_123|>": 100420, + "<|LOC_124|>": 100421, + "<|LOC_125|>": 100422, + "<|LOC_126|>": 100423, + "<|LOC_127|>": 100424, + "<|LOC_128|>": 100425, + "<|LOC_129|>": 100426, + "<|LOC_12|>": 100309, + "<|LOC_130|>": 100427, + "<|LOC_131|>": 100428, + "<|LOC_132|>": 100429, + "<|LOC_133|>": 100430, + "<|LOC_134|>": 100431, + "<|LOC_135|>": 100432, + "<|LOC_136|>": 100433, + "<|LOC_137|>": 100434, + "<|LOC_138|>": 100435, + "<|LOC_139|>": 100436, + "<|LOC_13|>": 100310, + "<|LOC_140|>": 100437, + "<|LOC_141|>": 100438, + "<|LOC_142|>": 100439, + "<|LOC_143|>": 100440, + "<|LOC_144|>": 100441, + "<|LOC_145|>": 100442, + "<|LOC_146|>": 100443, + "<|LOC_147|>": 100444, + "<|LOC_148|>": 100445, + "<|LOC_149|>": 100446, + "<|LOC_14|>": 100311, + "<|LOC_150|>": 100447, + "<|LOC_151|>": 100448, + "<|LOC_152|>": 100449, + "<|LOC_153|>": 100450, + "<|LOC_154|>": 100451, + "<|LOC_155|>": 100452, + "<|LOC_156|>": 100453, + "<|LOC_157|>": 100454, + "<|LOC_158|>": 100455, + "<|LOC_159|>": 100456, + "<|LOC_15|>": 100312, + "<|LOC_160|>": 100457, + "<|LOC_161|>": 100458, + "<|LOC_162|>": 100459, + "<|LOC_163|>": 100460, + "<|LOC_164|>": 100461, + "<|LOC_165|>": 100462, + "<|LOC_166|>": 100463, + "<|LOC_167|>": 100464, + "<|LOC_168|>": 100465, + "<|LOC_169|>": 100466, + "<|LOC_16|>": 100313, + "<|LOC_170|>": 100467, + "<|LOC_171|>": 100468, + "<|LOC_172|>": 100469, + "<|LOC_173|>": 100470, + "<|LOC_174|>": 100471, + "<|LOC_175|>": 100472, + "<|LOC_176|>": 100473, + "<|LOC_177|>": 100474, + "<|LOC_178|>": 100475, + "<|LOC_179|>": 100476, + "<|LOC_17|>": 100314, + "<|LOC_180|>": 100477, + "<|LOC_181|>": 100478, + "<|LOC_182|>": 100479, + "<|LOC_183|>": 100480, + "<|LOC_184|>": 100481, + "<|LOC_185|>": 100482, + "<|LOC_186|>": 100483, + "<|LOC_187|>": 100484, + "<|LOC_188|>": 100485, + "<|LOC_189|>": 100486, + "<|LOC_18|>": 100315, + "<|LOC_190|>": 100487, + "<|LOC_191|>": 100488, + "<|LOC_192|>": 100489, + "<|LOC_193|>": 100490, + "<|LOC_194|>": 100491, + "<|LOC_195|>": 100492, + "<|LOC_196|>": 100493, + "<|LOC_197|>": 100494, + "<|LOC_198|>": 100495, + "<|LOC_199|>": 100496, + "<|LOC_19|>": 100316, + "<|LOC_1|>": 100298, + "<|LOC_200|>": 100497, + "<|LOC_201|>": 100498, + "<|LOC_202|>": 100499, + "<|LOC_203|>": 100500, + "<|LOC_204|>": 100501, + "<|LOC_205|>": 100502, + "<|LOC_206|>": 100503, + "<|LOC_207|>": 100504, + "<|LOC_208|>": 100505, + "<|LOC_209|>": 100506, + "<|LOC_20|>": 100317, + "<|LOC_210|>": 100507, + "<|LOC_211|>": 100508, + "<|LOC_212|>": 100509, + "<|LOC_213|>": 100510, + "<|LOC_214|>": 100511, + "<|LOC_215|>": 100512, + "<|LOC_216|>": 100513, + "<|LOC_217|>": 100514, + "<|LOC_218|>": 100515, + "<|LOC_219|>": 100516, + "<|LOC_21|>": 100318, + "<|LOC_220|>": 100517, + "<|LOC_221|>": 100518, + "<|LOC_222|>": 100519, + "<|LOC_223|>": 100520, + "<|LOC_224|>": 100521, + "<|LOC_225|>": 100522, + "<|LOC_226|>": 100523, + "<|LOC_227|>": 100524, + "<|LOC_228|>": 100525, + "<|LOC_229|>": 100526, + "<|LOC_22|>": 100319, + "<|LOC_230|>": 100527, + "<|LOC_231|>": 100528, + "<|LOC_232|>": 100529, + "<|LOC_233|>": 100530, + "<|LOC_234|>": 100531, + "<|LOC_235|>": 100532, + "<|LOC_236|>": 100533, + "<|LOC_237|>": 100534, + "<|LOC_238|>": 100535, + "<|LOC_239|>": 100536, + "<|LOC_23|>": 100320, + "<|LOC_240|>": 100537, + "<|LOC_241|>": 100538, + "<|LOC_242|>": 100539, + "<|LOC_243|>": 100540, + "<|LOC_244|>": 100541, + "<|LOC_245|>": 100542, + "<|LOC_246|>": 100543, + "<|LOC_247|>": 100544, + "<|LOC_248|>": 100545, + "<|LOC_249|>": 100546, + "<|LOC_24|>": 100321, + "<|LOC_250|>": 100547, + "<|LOC_251|>": 100548, + "<|LOC_252|>": 100549, + "<|LOC_253|>": 100550, + "<|LOC_254|>": 100551, + "<|LOC_255|>": 100552, + "<|LOC_256|>": 100553, + "<|LOC_257|>": 100554, + "<|LOC_258|>": 100555, + "<|LOC_259|>": 100556, + "<|LOC_25|>": 100322, + "<|LOC_260|>": 100557, + "<|LOC_261|>": 100558, + "<|LOC_262|>": 100559, + "<|LOC_263|>": 100560, + "<|LOC_264|>": 100561, + "<|LOC_265|>": 100562, + "<|LOC_266|>": 100563, + "<|LOC_267|>": 100564, + "<|LOC_268|>": 100565, + "<|LOC_269|>": 100566, + "<|LOC_26|>": 100323, + "<|LOC_270|>": 100567, + "<|LOC_271|>": 100568, + "<|LOC_272|>": 100569, + "<|LOC_273|>": 100570, + "<|LOC_274|>": 100571, + "<|LOC_275|>": 100572, + "<|LOC_276|>": 100573, + "<|LOC_277|>": 100574, + "<|LOC_278|>": 100575, + "<|LOC_279|>": 100576, + "<|LOC_27|>": 100324, + "<|LOC_280|>": 100577, + "<|LOC_281|>": 100578, + "<|LOC_282|>": 100579, + "<|LOC_283|>": 100580, + "<|LOC_284|>": 100581, + "<|LOC_285|>": 100582, + "<|LOC_286|>": 100583, + "<|LOC_287|>": 100584, + "<|LOC_288|>": 100585, + "<|LOC_289|>": 100586, + "<|LOC_28|>": 100325, + "<|LOC_290|>": 100587, + "<|LOC_291|>": 100588, + "<|LOC_292|>": 100589, + "<|LOC_293|>": 100590, + "<|LOC_294|>": 100591, + "<|LOC_295|>": 100592, + "<|LOC_296|>": 100593, + "<|LOC_297|>": 100594, + "<|LOC_298|>": 100595, + "<|LOC_299|>": 100596, + "<|LOC_29|>": 100326, + "<|LOC_2|>": 100299, + "<|LOC_300|>": 100597, + "<|LOC_301|>": 100598, + "<|LOC_302|>": 100599, + "<|LOC_303|>": 100600, + "<|LOC_304|>": 100601, + "<|LOC_305|>": 100602, + "<|LOC_306|>": 100603, + "<|LOC_307|>": 100604, + "<|LOC_308|>": 100605, + "<|LOC_309|>": 100606, + "<|LOC_30|>": 100327, + "<|LOC_310|>": 100607, + "<|LOC_311|>": 100608, + "<|LOC_312|>": 100609, + "<|LOC_313|>": 100610, + "<|LOC_314|>": 100611, + "<|LOC_315|>": 100612, + "<|LOC_316|>": 100613, + "<|LOC_317|>": 100614, + "<|LOC_318|>": 100615, + "<|LOC_319|>": 100616, + "<|LOC_31|>": 100328, + "<|LOC_320|>": 100617, + "<|LOC_321|>": 100618, + "<|LOC_322|>": 100619, + "<|LOC_323|>": 100620, + "<|LOC_324|>": 100621, + "<|LOC_325|>": 100622, + "<|LOC_326|>": 100623, + "<|LOC_327|>": 100624, + "<|LOC_328|>": 100625, + "<|LOC_329|>": 100626, + "<|LOC_32|>": 100329, + "<|LOC_330|>": 100627, + "<|LOC_331|>": 100628, + "<|LOC_332|>": 100629, + "<|LOC_333|>": 100630, + "<|LOC_334|>": 100631, + "<|LOC_335|>": 100632, + "<|LOC_336|>": 100633, + "<|LOC_337|>": 100634, + "<|LOC_338|>": 100635, + "<|LOC_339|>": 100636, + "<|LOC_33|>": 100330, + "<|LOC_340|>": 100637, + "<|LOC_341|>": 100638, + "<|LOC_342|>": 100639, + "<|LOC_343|>": 100640, + "<|LOC_344|>": 100641, + "<|LOC_345|>": 100642, + "<|LOC_346|>": 100643, + "<|LOC_347|>": 100644, + "<|LOC_348|>": 100645, + "<|LOC_349|>": 100646, + "<|LOC_34|>": 100331, + "<|LOC_350|>": 100647, + "<|LOC_351|>": 100648, + "<|LOC_352|>": 100649, + "<|LOC_353|>": 100650, + "<|LOC_354|>": 100651, + "<|LOC_355|>": 100652, + "<|LOC_356|>": 100653, + "<|LOC_357|>": 100654, + "<|LOC_358|>": 100655, + "<|LOC_359|>": 100656, + "<|LOC_35|>": 100332, + "<|LOC_360|>": 100657, + "<|LOC_361|>": 100658, + "<|LOC_362|>": 100659, + "<|LOC_363|>": 100660, + "<|LOC_364|>": 100661, + "<|LOC_365|>": 100662, + "<|LOC_366|>": 100663, + "<|LOC_367|>": 100664, + "<|LOC_368|>": 100665, + "<|LOC_369|>": 100666, + "<|LOC_36|>": 100333, + "<|LOC_370|>": 100667, + "<|LOC_371|>": 100668, + "<|LOC_372|>": 100669, + "<|LOC_373|>": 100670, + "<|LOC_374|>": 100671, + "<|LOC_375|>": 100672, + "<|LOC_376|>": 100673, + "<|LOC_377|>": 100674, + "<|LOC_378|>": 100675, + "<|LOC_379|>": 100676, + "<|LOC_37|>": 100334, + "<|LOC_380|>": 100677, + "<|LOC_381|>": 100678, + "<|LOC_382|>": 100679, + "<|LOC_383|>": 100680, + "<|LOC_384|>": 100681, + "<|LOC_385|>": 100682, + "<|LOC_386|>": 100683, + "<|LOC_387|>": 100684, + "<|LOC_388|>": 100685, + "<|LOC_389|>": 100686, + "<|LOC_38|>": 100335, + "<|LOC_390|>": 100687, + "<|LOC_391|>": 100688, + "<|LOC_392|>": 100689, + "<|LOC_393|>": 100690, + "<|LOC_394|>": 100691, + "<|LOC_395|>": 100692, + "<|LOC_396|>": 100693, + "<|LOC_397|>": 100694, + "<|LOC_398|>": 100695, + "<|LOC_399|>": 100696, + "<|LOC_39|>": 100336, + "<|LOC_3|>": 100300, + "<|LOC_400|>": 100697, + "<|LOC_401|>": 100698, + "<|LOC_402|>": 100699, + "<|LOC_403|>": 100700, + "<|LOC_404|>": 100701, + "<|LOC_405|>": 100702, + "<|LOC_406|>": 100703, + "<|LOC_407|>": 100704, + "<|LOC_408|>": 100705, + "<|LOC_409|>": 100706, + "<|LOC_40|>": 100337, + "<|LOC_410|>": 100707, + "<|LOC_411|>": 100708, + "<|LOC_412|>": 100709, + "<|LOC_413|>": 100710, + "<|LOC_414|>": 100711, + "<|LOC_415|>": 100712, + "<|LOC_416|>": 100713, + "<|LOC_417|>": 100714, + "<|LOC_418|>": 100715, + "<|LOC_419|>": 100716, + "<|LOC_41|>": 100338, + "<|LOC_420|>": 100717, + "<|LOC_421|>": 100718, + "<|LOC_422|>": 100719, + "<|LOC_423|>": 100720, + "<|LOC_424|>": 100721, + "<|LOC_425|>": 100722, + "<|LOC_426|>": 100723, + "<|LOC_427|>": 100724, + "<|LOC_428|>": 100725, + "<|LOC_429|>": 100726, + "<|LOC_42|>": 100339, + "<|LOC_430|>": 100727, + "<|LOC_431|>": 100728, + "<|LOC_432|>": 100729, + "<|LOC_433|>": 100730, + "<|LOC_434|>": 100731, + "<|LOC_435|>": 100732, + "<|LOC_436|>": 100733, + "<|LOC_437|>": 100734, + "<|LOC_438|>": 100735, + "<|LOC_439|>": 100736, + "<|LOC_43|>": 100340, + "<|LOC_440|>": 100737, + "<|LOC_441|>": 100738, + "<|LOC_442|>": 100739, + "<|LOC_443|>": 100740, + "<|LOC_444|>": 100741, + "<|LOC_445|>": 100742, + "<|LOC_446|>": 100743, + "<|LOC_447|>": 100744, + "<|LOC_448|>": 100745, + "<|LOC_449|>": 100746, + "<|LOC_44|>": 100341, + "<|LOC_450|>": 100747, + "<|LOC_451|>": 100748, + "<|LOC_452|>": 100749, + "<|LOC_453|>": 100750, + "<|LOC_454|>": 100751, + "<|LOC_455|>": 100752, + "<|LOC_456|>": 100753, + "<|LOC_457|>": 100754, + "<|LOC_458|>": 100755, + "<|LOC_459|>": 100756, + "<|LOC_45|>": 100342, + "<|LOC_460|>": 100757, + "<|LOC_461|>": 100758, + "<|LOC_462|>": 100759, + "<|LOC_463|>": 100760, + "<|LOC_464|>": 100761, + "<|LOC_465|>": 100762, + "<|LOC_466|>": 100763, + "<|LOC_467|>": 100764, + "<|LOC_468|>": 100765, + "<|LOC_469|>": 100766, + "<|LOC_46|>": 100343, + "<|LOC_470|>": 100767, + "<|LOC_471|>": 100768, + "<|LOC_472|>": 100769, + "<|LOC_473|>": 100770, + "<|LOC_474|>": 100771, + "<|LOC_475|>": 100772, + "<|LOC_476|>": 100773, + "<|LOC_477|>": 100774, + "<|LOC_478|>": 100775, + "<|LOC_479|>": 100776, + "<|LOC_47|>": 100344, + "<|LOC_480|>": 100777, + "<|LOC_481|>": 100778, + "<|LOC_482|>": 100779, + "<|LOC_483|>": 100780, + "<|LOC_484|>": 100781, + "<|LOC_485|>": 100782, + "<|LOC_486|>": 100783, + "<|LOC_487|>": 100784, + "<|LOC_488|>": 100785, + "<|LOC_489|>": 100786, + "<|LOC_48|>": 100345, + "<|LOC_490|>": 100787, + "<|LOC_491|>": 100788, + "<|LOC_492|>": 100789, + "<|LOC_493|>": 100790, + "<|LOC_494|>": 100791, + "<|LOC_495|>": 100792, + "<|LOC_496|>": 100793, + "<|LOC_497|>": 100794, + "<|LOC_498|>": 100795, + "<|LOC_499|>": 100796, + "<|LOC_49|>": 100346, + "<|LOC_4|>": 100301, + "<|LOC_500|>": 100797, + "<|LOC_501|>": 100798, + "<|LOC_502|>": 100799, + "<|LOC_503|>": 100800, + "<|LOC_504|>": 100801, + "<|LOC_505|>": 100802, + "<|LOC_506|>": 100803, + "<|LOC_507|>": 100804, + "<|LOC_508|>": 100805, + "<|LOC_509|>": 100806, + "<|LOC_50|>": 100347, + "<|LOC_510|>": 100807, + "<|LOC_511|>": 100808, + "<|LOC_512|>": 100809, + "<|LOC_513|>": 100810, + "<|LOC_514|>": 100811, + "<|LOC_515|>": 100812, + "<|LOC_516|>": 100813, + "<|LOC_517|>": 100814, + "<|LOC_518|>": 100815, + "<|LOC_519|>": 100816, + "<|LOC_51|>": 100348, + "<|LOC_520|>": 100817, + "<|LOC_521|>": 100818, + "<|LOC_522|>": 100819, + "<|LOC_523|>": 100820, + "<|LOC_524|>": 100821, + "<|LOC_525|>": 100822, + "<|LOC_526|>": 100823, + "<|LOC_527|>": 100824, + "<|LOC_528|>": 100825, + "<|LOC_529|>": 100826, + "<|LOC_52|>": 100349, + "<|LOC_530|>": 100827, + "<|LOC_531|>": 100828, + "<|LOC_532|>": 100829, + "<|LOC_533|>": 100830, + "<|LOC_534|>": 100831, + "<|LOC_535|>": 100832, + "<|LOC_536|>": 100833, + "<|LOC_537|>": 100834, + "<|LOC_538|>": 100835, + "<|LOC_539|>": 100836, + "<|LOC_53|>": 100350, + "<|LOC_540|>": 100837, + "<|LOC_541|>": 100838, + "<|LOC_542|>": 100839, + "<|LOC_543|>": 100840, + "<|LOC_544|>": 100841, + "<|LOC_545|>": 100842, + "<|LOC_546|>": 100843, + "<|LOC_547|>": 100844, + "<|LOC_548|>": 100845, + "<|LOC_549|>": 100846, + "<|LOC_54|>": 100351, + "<|LOC_550|>": 100847, + "<|LOC_551|>": 100848, + "<|LOC_552|>": 100849, + "<|LOC_553|>": 100850, + "<|LOC_554|>": 100851, + "<|LOC_555|>": 100852, + "<|LOC_556|>": 100853, + "<|LOC_557|>": 100854, + "<|LOC_558|>": 100855, + "<|LOC_559|>": 100856, + "<|LOC_55|>": 100352, + "<|LOC_560|>": 100857, + "<|LOC_561|>": 100858, + "<|LOC_562|>": 100859, + "<|LOC_563|>": 100860, + "<|LOC_564|>": 100861, + "<|LOC_565|>": 100862, + "<|LOC_566|>": 100863, + "<|LOC_567|>": 100864, + "<|LOC_568|>": 100865, + "<|LOC_569|>": 100866, + "<|LOC_56|>": 100353, + "<|LOC_570|>": 100867, + "<|LOC_571|>": 100868, + "<|LOC_572|>": 100869, + "<|LOC_573|>": 100870, + "<|LOC_574|>": 100871, + "<|LOC_575|>": 100872, + "<|LOC_576|>": 100873, + "<|LOC_577|>": 100874, + "<|LOC_578|>": 100875, + "<|LOC_579|>": 100876, + "<|LOC_57|>": 100354, + "<|LOC_580|>": 100877, + "<|LOC_581|>": 100878, + "<|LOC_582|>": 100879, + "<|LOC_583|>": 100880, + "<|LOC_584|>": 100881, + "<|LOC_585|>": 100882, + "<|LOC_586|>": 100883, + "<|LOC_587|>": 100884, + "<|LOC_588|>": 100885, + "<|LOC_589|>": 100886, + "<|LOC_58|>": 100355, + "<|LOC_590|>": 100887, + "<|LOC_591|>": 100888, + "<|LOC_592|>": 100889, + "<|LOC_593|>": 100890, + "<|LOC_594|>": 100891, + "<|LOC_595|>": 100892, + "<|LOC_596|>": 100893, + "<|LOC_597|>": 100894, + "<|LOC_598|>": 100895, + "<|LOC_599|>": 100896, + "<|LOC_59|>": 100356, + "<|LOC_5|>": 100302, + "<|LOC_600|>": 100897, + "<|LOC_601|>": 100898, + "<|LOC_602|>": 100899, + "<|LOC_603|>": 100900, + "<|LOC_604|>": 100901, + "<|LOC_605|>": 100902, + "<|LOC_606|>": 100903, + "<|LOC_607|>": 100904, + "<|LOC_608|>": 100905, + "<|LOC_609|>": 100906, + "<|LOC_60|>": 100357, + "<|LOC_610|>": 100907, + "<|LOC_611|>": 100908, + "<|LOC_612|>": 100909, + "<|LOC_613|>": 100910, + "<|LOC_614|>": 100911, + "<|LOC_615|>": 100912, + "<|LOC_616|>": 100913, + "<|LOC_617|>": 100914, + "<|LOC_618|>": 100915, + "<|LOC_619|>": 100916, + "<|LOC_61|>": 100358, + "<|LOC_620|>": 100917, + "<|LOC_621|>": 100918, + "<|LOC_622|>": 100919, + "<|LOC_623|>": 100920, + "<|LOC_624|>": 100921, + "<|LOC_625|>": 100922, + "<|LOC_626|>": 100923, + "<|LOC_627|>": 100924, + "<|LOC_628|>": 100925, + "<|LOC_629|>": 100926, + "<|LOC_62|>": 100359, + "<|LOC_630|>": 100927, + "<|LOC_631|>": 100928, + "<|LOC_632|>": 100929, + "<|LOC_633|>": 100930, + "<|LOC_634|>": 100931, + "<|LOC_635|>": 100932, + "<|LOC_636|>": 100933, + "<|LOC_637|>": 100934, + "<|LOC_638|>": 100935, + "<|LOC_639|>": 100936, + "<|LOC_63|>": 100360, + "<|LOC_640|>": 100937, + "<|LOC_641|>": 100938, + "<|LOC_642|>": 100939, + "<|LOC_643|>": 100940, + "<|LOC_644|>": 100941, + "<|LOC_645|>": 100942, + "<|LOC_646|>": 100943, + "<|LOC_647|>": 100944, + "<|LOC_648|>": 100945, + "<|LOC_649|>": 100946, + "<|LOC_64|>": 100361, + "<|LOC_650|>": 100947, + "<|LOC_651|>": 100948, + "<|LOC_652|>": 100949, + "<|LOC_653|>": 100950, + "<|LOC_654|>": 100951, + "<|LOC_655|>": 100952, + "<|LOC_656|>": 100953, + "<|LOC_657|>": 100954, + "<|LOC_658|>": 100955, + "<|LOC_659|>": 100956, + "<|LOC_65|>": 100362, + "<|LOC_660|>": 100957, + "<|LOC_661|>": 100958, + "<|LOC_662|>": 100959, + "<|LOC_663|>": 100960, + "<|LOC_664|>": 100961, + "<|LOC_665|>": 100962, + "<|LOC_666|>": 100963, + "<|LOC_667|>": 100964, + "<|LOC_668|>": 100965, + "<|LOC_669|>": 100966, + "<|LOC_66|>": 100363, + "<|LOC_670|>": 100967, + "<|LOC_671|>": 100968, + "<|LOC_672|>": 100969, + "<|LOC_673|>": 100970, + "<|LOC_674|>": 100971, + "<|LOC_675|>": 100972, + "<|LOC_676|>": 100973, + "<|LOC_677|>": 100974, + "<|LOC_678|>": 100975, + "<|LOC_679|>": 100976, + "<|LOC_67|>": 100364, + "<|LOC_680|>": 100977, + "<|LOC_681|>": 100978, + "<|LOC_682|>": 100979, + "<|LOC_683|>": 100980, + "<|LOC_684|>": 100981, + "<|LOC_685|>": 100982, + "<|LOC_686|>": 100983, + "<|LOC_687|>": 100984, + "<|LOC_688|>": 100985, + "<|LOC_689|>": 100986, + "<|LOC_68|>": 100365, + "<|LOC_690|>": 100987, + "<|LOC_691|>": 100988, + "<|LOC_692|>": 100989, + "<|LOC_693|>": 100990, + "<|LOC_694|>": 100991, + "<|LOC_695|>": 100992, + "<|LOC_696|>": 100993, + "<|LOC_697|>": 100994, + "<|LOC_698|>": 100995, + "<|LOC_699|>": 100996, + "<|LOC_69|>": 100366, + "<|LOC_6|>": 100303, + "<|LOC_700|>": 100997, + "<|LOC_701|>": 100998, + "<|LOC_702|>": 100999, + "<|LOC_703|>": 101000, + "<|LOC_704|>": 101001, + "<|LOC_705|>": 101002, + "<|LOC_706|>": 101003, + "<|LOC_707|>": 101004, + "<|LOC_708|>": 101005, + "<|LOC_709|>": 101006, + "<|LOC_70|>": 100367, + "<|LOC_710|>": 101007, + "<|LOC_711|>": 101008, + "<|LOC_712|>": 101009, + "<|LOC_713|>": 101010, + "<|LOC_714|>": 101011, + "<|LOC_715|>": 101012, + "<|LOC_716|>": 101013, + "<|LOC_717|>": 101014, + "<|LOC_718|>": 101015, + "<|LOC_719|>": 101016, + "<|LOC_71|>": 100368, + "<|LOC_720|>": 101017, + "<|LOC_721|>": 101018, + "<|LOC_722|>": 101019, + "<|LOC_723|>": 101020, + "<|LOC_724|>": 101021, + "<|LOC_725|>": 101022, + "<|LOC_726|>": 101023, + "<|LOC_727|>": 101024, + "<|LOC_728|>": 101025, + "<|LOC_729|>": 101026, + "<|LOC_72|>": 100369, + "<|LOC_730|>": 101027, + "<|LOC_731|>": 101028, + "<|LOC_732|>": 101029, + "<|LOC_733|>": 101030, + "<|LOC_734|>": 101031, + "<|LOC_735|>": 101032, + "<|LOC_736|>": 101033, + "<|LOC_737|>": 101034, + "<|LOC_738|>": 101035, + "<|LOC_739|>": 101036, + "<|LOC_73|>": 100370, + "<|LOC_740|>": 101037, + "<|LOC_741|>": 101038, + "<|LOC_742|>": 101039, + "<|LOC_743|>": 101040, + "<|LOC_744|>": 101041, + "<|LOC_745|>": 101042, + "<|LOC_746|>": 101043, + "<|LOC_747|>": 101044, + "<|LOC_748|>": 101045, + "<|LOC_749|>": 101046, + "<|LOC_74|>": 100371, + "<|LOC_750|>": 101047, + "<|LOC_751|>": 101048, + "<|LOC_752|>": 101049, + "<|LOC_753|>": 101050, + "<|LOC_754|>": 101051, + "<|LOC_755|>": 101052, + "<|LOC_756|>": 101053, + "<|LOC_757|>": 101054, + "<|LOC_758|>": 101055, + "<|LOC_759|>": 101056, + "<|LOC_75|>": 100372, + "<|LOC_760|>": 101057, + "<|LOC_761|>": 101058, + "<|LOC_762|>": 101059, + "<|LOC_763|>": 101060, + "<|LOC_764|>": 101061, + "<|LOC_765|>": 101062, + "<|LOC_766|>": 101063, + "<|LOC_767|>": 101064, + "<|LOC_768|>": 101065, + "<|LOC_769|>": 101066, + "<|LOC_76|>": 100373, + "<|LOC_770|>": 101067, + "<|LOC_771|>": 101068, + "<|LOC_772|>": 101069, + "<|LOC_773|>": 101070, + "<|LOC_774|>": 101071, + "<|LOC_775|>": 101072, + "<|LOC_776|>": 101073, + "<|LOC_777|>": 101074, + "<|LOC_778|>": 101075, + "<|LOC_779|>": 101076, + "<|LOC_77|>": 100374, + "<|LOC_780|>": 101077, + "<|LOC_781|>": 101078, + "<|LOC_782|>": 101079, + "<|LOC_783|>": 101080, + "<|LOC_784|>": 101081, + "<|LOC_785|>": 101082, + "<|LOC_786|>": 101083, + "<|LOC_787|>": 101084, + "<|LOC_788|>": 101085, + "<|LOC_789|>": 101086, + "<|LOC_78|>": 100375, + "<|LOC_790|>": 101087, + "<|LOC_791|>": 101088, + "<|LOC_792|>": 101089, + "<|LOC_793|>": 101090, + "<|LOC_794|>": 101091, + "<|LOC_795|>": 101092, + "<|LOC_796|>": 101093, + "<|LOC_797|>": 101094, + "<|LOC_798|>": 101095, + "<|LOC_799|>": 101096, + "<|LOC_79|>": 100376, + "<|LOC_7|>": 100304, + "<|LOC_800|>": 101097, + "<|LOC_801|>": 101098, + "<|LOC_802|>": 101099, + "<|LOC_803|>": 101100, + "<|LOC_804|>": 101101, + "<|LOC_805|>": 101102, + "<|LOC_806|>": 101103, + "<|LOC_807|>": 101104, + "<|LOC_808|>": 101105, + "<|LOC_809|>": 101106, + "<|LOC_80|>": 100377, + "<|LOC_810|>": 101107, + "<|LOC_811|>": 101108, + "<|LOC_812|>": 101109, + "<|LOC_813|>": 101110, + "<|LOC_814|>": 101111, + "<|LOC_815|>": 101112, + "<|LOC_816|>": 101113, + "<|LOC_817|>": 101114, + "<|LOC_818|>": 101115, + "<|LOC_819|>": 101116, + "<|LOC_81|>": 100378, + "<|LOC_820|>": 101117, + "<|LOC_821|>": 101118, + "<|LOC_822|>": 101119, + "<|LOC_823|>": 101120, + "<|LOC_824|>": 101121, + "<|LOC_825|>": 101122, + "<|LOC_826|>": 101123, + "<|LOC_827|>": 101124, + "<|LOC_828|>": 101125, + "<|LOC_829|>": 101126, + "<|LOC_82|>": 100379, + "<|LOC_830|>": 101127, + "<|LOC_831|>": 101128, + "<|LOC_832|>": 101129, + "<|LOC_833|>": 101130, + "<|LOC_834|>": 101131, + "<|LOC_835|>": 101132, + "<|LOC_836|>": 101133, + "<|LOC_837|>": 101134, + "<|LOC_838|>": 101135, + "<|LOC_839|>": 101136, + "<|LOC_83|>": 100380, + "<|LOC_840|>": 101137, + "<|LOC_841|>": 101138, + "<|LOC_842|>": 101139, + "<|LOC_843|>": 101140, + "<|LOC_844|>": 101141, + "<|LOC_845|>": 101142, + "<|LOC_846|>": 101143, + "<|LOC_847|>": 101144, + "<|LOC_848|>": 101145, + "<|LOC_849|>": 101146, + "<|LOC_84|>": 100381, + "<|LOC_850|>": 101147, + "<|LOC_851|>": 101148, + "<|LOC_852|>": 101149, + "<|LOC_853|>": 101150, + "<|LOC_854|>": 101151, + "<|LOC_855|>": 101152, + "<|LOC_856|>": 101153, + "<|LOC_857|>": 101154, + "<|LOC_858|>": 101155, + "<|LOC_859|>": 101156, + "<|LOC_85|>": 100382, + "<|LOC_860|>": 101157, + "<|LOC_861|>": 101158, + "<|LOC_862|>": 101159, + "<|LOC_863|>": 101160, + "<|LOC_864|>": 101161, + "<|LOC_865|>": 101162, + "<|LOC_866|>": 101163, + "<|LOC_867|>": 101164, + "<|LOC_868|>": 101165, + "<|LOC_869|>": 101166, + "<|LOC_86|>": 100383, + "<|LOC_870|>": 101167, + "<|LOC_871|>": 101168, + "<|LOC_872|>": 101169, + "<|LOC_873|>": 101170, + "<|LOC_874|>": 101171, + "<|LOC_875|>": 101172, + "<|LOC_876|>": 101173, + "<|LOC_877|>": 101174, + "<|LOC_878|>": 101175, + "<|LOC_879|>": 101176, + "<|LOC_87|>": 100384, + "<|LOC_880|>": 101177, + "<|LOC_881|>": 101178, + "<|LOC_882|>": 101179, + "<|LOC_883|>": 101180, + "<|LOC_884|>": 101181, + "<|LOC_885|>": 101182, + "<|LOC_886|>": 101183, + "<|LOC_887|>": 101184, + "<|LOC_888|>": 101185, + "<|LOC_889|>": 101186, + "<|LOC_88|>": 100385, + "<|LOC_890|>": 101187, + "<|LOC_891|>": 101188, + "<|LOC_892|>": 101189, + "<|LOC_893|>": 101190, + "<|LOC_894|>": 101191, + "<|LOC_895|>": 101192, + "<|LOC_896|>": 101193, + "<|LOC_897|>": 101194, + "<|LOC_898|>": 101195, + "<|LOC_899|>": 101196, + "<|LOC_89|>": 100386, + "<|LOC_8|>": 100305, + "<|LOC_900|>": 101197, + "<|LOC_901|>": 101198, + "<|LOC_902|>": 101199, + "<|LOC_903|>": 101200, + "<|LOC_904|>": 101201, + "<|LOC_905|>": 101202, + "<|LOC_906|>": 101203, + "<|LOC_907|>": 101204, + "<|LOC_908|>": 101205, + "<|LOC_909|>": 101206, + "<|LOC_90|>": 100387, + "<|LOC_910|>": 101207, + "<|LOC_911|>": 101208, + "<|LOC_912|>": 101209, + "<|LOC_913|>": 101210, + "<|LOC_914|>": 101211, + "<|LOC_915|>": 101212, + "<|LOC_916|>": 101213, + "<|LOC_917|>": 101214, + "<|LOC_918|>": 101215, + "<|LOC_919|>": 101216, + "<|LOC_91|>": 100388, + "<|LOC_920|>": 101217, + "<|LOC_921|>": 101218, + "<|LOC_922|>": 101219, + "<|LOC_923|>": 101220, + "<|LOC_924|>": 101221, + "<|LOC_925|>": 101222, + "<|LOC_926|>": 101223, + "<|LOC_927|>": 101224, + "<|LOC_928|>": 101225, + "<|LOC_929|>": 101226, + "<|LOC_92|>": 100389, + "<|LOC_930|>": 101227, + "<|LOC_931|>": 101228, + "<|LOC_932|>": 101229, + "<|LOC_933|>": 101230, + "<|LOC_934|>": 101231, + "<|LOC_935|>": 101232, + "<|LOC_936|>": 101233, + "<|LOC_937|>": 101234, + "<|LOC_938|>": 101235, + "<|LOC_939|>": 101236, + "<|LOC_93|>": 100390, + "<|LOC_940|>": 101237, + "<|LOC_941|>": 101238, + "<|LOC_942|>": 101239, + "<|LOC_943|>": 101240, + "<|LOC_944|>": 101241, + "<|LOC_945|>": 101242, + "<|LOC_946|>": 101243, + "<|LOC_947|>": 101244, + "<|LOC_948|>": 101245, + "<|LOC_949|>": 101246, + "<|LOC_94|>": 100391, + "<|LOC_950|>": 101247, + "<|LOC_951|>": 101248, + "<|LOC_952|>": 101249, + "<|LOC_953|>": 101250, + "<|LOC_954|>": 101251, + "<|LOC_955|>": 101252, + "<|LOC_956|>": 101253, + "<|LOC_957|>": 101254, + "<|LOC_958|>": 101255, + "<|LOC_959|>": 101256, + "<|LOC_95|>": 100392, + "<|LOC_960|>": 101257, + "<|LOC_961|>": 101258, + "<|LOC_962|>": 101259, + "<|LOC_963|>": 101260, + "<|LOC_964|>": 101261, + "<|LOC_965|>": 101262, + "<|LOC_966|>": 101263, + "<|LOC_967|>": 101264, + "<|LOC_968|>": 101265, + "<|LOC_969|>": 101266, + "<|LOC_96|>": 100393, + "<|LOC_970|>": 101267, + "<|LOC_971|>": 101268, + "<|LOC_972|>": 101269, + "<|LOC_973|>": 101270, + "<|LOC_974|>": 101271, + "<|LOC_975|>": 101272, + "<|LOC_976|>": 101273, + "<|LOC_977|>": 101274, + "<|LOC_978|>": 101275, + "<|LOC_979|>": 101276, + "<|LOC_97|>": 100394, + "<|LOC_980|>": 101277, + "<|LOC_981|>": 101278, + "<|LOC_982|>": 101279, + "<|LOC_983|>": 101280, + "<|LOC_984|>": 101281, + "<|LOC_985|>": 101282, + "<|LOC_986|>": 101283, + "<|LOC_987|>": 101284, + "<|LOC_988|>": 101285, + "<|LOC_989|>": 101286, + "<|LOC_98|>": 100395, + "<|LOC_990|>": 101287, + "<|LOC_991|>": 101288, + "<|LOC_992|>": 101289, + "<|LOC_993|>": 101290, + "<|LOC_994|>": 101291, + "<|LOC_995|>": 101292, + "<|LOC_996|>": 101293, + "<|LOC_997|>": 101294, + "<|LOC_998|>": 101295, + "<|LOC_999|>": 101296, + "<|LOC_99|>": 100396, + "<|LOC_9|>": 100306, + "<|LOC_BEGIN|>": 101298, + "<|LOC_END|>": 101299, + "<|LOC_SEP|>": 101300, + "<|image_pad|>": 101304, + "<|video_pad|>": 101307 +} diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..7b55077ebff1def62b6170843569332368291a88 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,43 @@ +{%- if not add_generation_prompt is defined -%} + {%- set add_generation_prompt = true -%} +{%- endif -%} +{%- if not cls_token is defined -%} + {%- set cls_token = "<|begin_of_sentence|>" -%} +{%- endif -%} +{%- if not eos_token is defined -%} + {%- set eos_token = "" -%} +{%- endif -%} +{{- cls_token -}} +{%- for message in messages -%} + {%- if message["role"] == "user" -%} + {{- "User: " -}} + {%- for content in message["content"] -%} + {%- if content["type"] == "image" -%} + {{ "<|IMAGE_START|><|IMAGE_PLACEHOLDER|><|IMAGE_END|>" }} + {%- endif -%} + {%- endfor -%} + {%- for content in message["content"] -%} + {%- if content["type"] == "text" -%} + {{ content["text"] }} + {%- endif -%} + {%- endfor -%} + {{ "\n" -}} + {%- elif message["role"] == "assistant" -%} + {{- "Assistant: " -}} + {%- for content in message["content"] -%} + {%- if content["type"] == "text" -%} + {{ content["text"] }} + {%- endif -%} + {%- endfor -%} + {{ eos_token -}} + {%- elif message["role"] == "system" -%} + {%- for content in message["content"] -%} + {%- if content["type"] == "text" -%} + {{ content["text"] + "\n" }} + {%- endif -%} + {%- endfor -%} + {%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + {{- "Assistant: " -}} +{%- endif -%} diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bd44b0c90589e54ac65b3e63964b66d3e43625ce --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,207 @@ +--- +base_model: PaddlePaddle/PaddleOCR-VL +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:PaddlePaddle/PaddleOCR-VL +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.0 \ No newline at end of file diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37155570d2b4ded594fb262b1747669fed865f87 --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "PaddlePaddle/PaddleOCR-VL", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ca8921354f051ad248b681b0a26e5c7ee031e58 --- /dev/null +++ b/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9a73d6cfd018136c60530533dcb016f4486cf11196570fe0763c6c57aeb88d3 +size 22015368 diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a09cc0321a65f49e74c7cb1ce954bb548cc88eeb --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3c3131ef16d8699fc7870b86cc5fe01a047c8d25e7a2b17177ca73b74909c78 +size 44203339 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7d392cde05001ebae258caf1043da28070c8d61c --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:654e1cc7522de529d47176a737ea3a777562cc6762ca913f2273f399034678c6 +size 14645 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf10fae5ba0c3d545e1b556f2426b08daaa95c88 --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62507ca2bb4e90fa6aa37c59a93be971de3e2eefa2f166f6a96468ea87e0302c +size 1465 diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..949ed5443df557231cd2971f0882574f934278ca --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,104 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3552397868561279, + "eval_steps": 200, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.035523978685612786, + "grad_norm": 31.536340713500977, + "learning_rate": 1.5517241379310346e-05, + "loss": 10.1017, + "step": 10 + }, + { + "epoch": 0.07104795737122557, + "grad_norm": 10.592451095581055, + "learning_rate": 3.275862068965517e-05, + "loss": 9.1487, + "step": 20 + }, + { + "epoch": 0.10657193605683836, + "grad_norm": 8.758255004882812, + "learning_rate": 5e-05, + "loss": 7.7266, + "step": 30 + }, + { + "epoch": 0.14209591474245115, + "grad_norm": 8.455466270446777, + "learning_rate": 4.8023715415019764e-05, + "loss": 6.4992, + "step": 40 + }, + { + "epoch": 0.17761989342806395, + "grad_norm": 10.96068000793457, + "learning_rate": 4.6047430830039526e-05, + "loss": 5.3786, + "step": 50 + }, + { + "epoch": 0.21314387211367672, + "grad_norm": 10.333888053894043, + "learning_rate": 4.4071146245059295e-05, + "loss": 4.141, + "step": 60 + }, + { + "epoch": 0.24866785079928952, + "grad_norm": 12.409323692321777, + "learning_rate": 4.2094861660079056e-05, + "loss": 3.6644, + "step": 70 + }, + { + "epoch": 0.2841918294849023, + "grad_norm": 9.275422096252441, + "learning_rate": 4.011857707509882e-05, + "loss": 3.4415, + "step": 80 + }, + { + "epoch": 0.3197158081705151, + "grad_norm": 8.710288047790527, + "learning_rate": 3.814229249011858e-05, + "loss": 2.8615, + "step": 90 + }, + { + "epoch": 0.3552397868561279, + "grad_norm": 10.596097946166992, + "learning_rate": 3.616600790513834e-05, + "loss": 2.8226, + "step": 100 + } + ], + "logging_steps": 10, + "max_steps": 282, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4036615557562368.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e688b1a0b10ac21ae9b4ff0f7c4c9bc35abe1a4 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa297d24b7259067297e2974f66954740991aaf0075fa5df2c91096c2a5aec35 +size 5713 diff --git a/checkpoint-150/README.md b/checkpoint-150/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bd44b0c90589e54ac65b3e63964b66d3e43625ce --- /dev/null +++ b/checkpoint-150/README.md @@ -0,0 +1,207 @@ +--- +base_model: PaddlePaddle/PaddleOCR-VL +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:PaddlePaddle/PaddleOCR-VL +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.0 \ No newline at end of file diff --git a/checkpoint-150/adapter_config.json b/checkpoint-150/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37155570d2b4ded594fb262b1747669fed865f87 --- /dev/null +++ b/checkpoint-150/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "PaddlePaddle/PaddleOCR-VL", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-150/adapter_model.safetensors b/checkpoint-150/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8aa0d4f85dcd65c09b46803ed732f35a667c1192 --- /dev/null +++ b/checkpoint-150/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:952cab0bf4ca1bfa90913617790485b70df6055db8af0b5a1d4fdae05db193fc +size 22015368 diff --git a/checkpoint-150/optimizer.pt b/checkpoint-150/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6219ba613e5c475b9e49e8c7b92e8573578d07cc --- /dev/null +++ b/checkpoint-150/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfacee6e6a93fcc0dd2b33a9aad151a6c32cc2207b6654527ead8c0e03a62143 +size 44203339 diff --git a/checkpoint-150/rng_state.pth b/checkpoint-150/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..34a51259c4a2982e91cf06a3cdf021d11dc967cf --- /dev/null +++ b/checkpoint-150/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd076cfcf76ebedded3b7a28136bfbb12cd2d18451482fac5be8be9e14f590bb +size 14645 diff --git a/checkpoint-150/scheduler.pt b/checkpoint-150/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1e487fcd73a6edc76f9370f94cef83cdaa65dd4 --- /dev/null +++ b/checkpoint-150/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcd538259e23f786cd5fda0fe864fe98766f1dbbcd5f188058099067bf92fb87 +size 1465 diff --git a/checkpoint-150/trainer_state.json b/checkpoint-150/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4acf78068d823393ae848a8704b9f68edc8bf909 --- /dev/null +++ b/checkpoint-150/trainer_state.json @@ -0,0 +1,139 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5328596802841918, + "eval_steps": 200, + "global_step": 150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.035523978685612786, + "grad_norm": 31.536340713500977, + "learning_rate": 1.5517241379310346e-05, + "loss": 10.1017, + "step": 10 + }, + { + "epoch": 0.07104795737122557, + "grad_norm": 10.592451095581055, + "learning_rate": 3.275862068965517e-05, + "loss": 9.1487, + "step": 20 + }, + { + "epoch": 0.10657193605683836, + "grad_norm": 8.758255004882812, + "learning_rate": 5e-05, + "loss": 7.7266, + "step": 30 + }, + { + "epoch": 0.14209591474245115, + "grad_norm": 8.455466270446777, + "learning_rate": 4.8023715415019764e-05, + "loss": 6.4992, + "step": 40 + }, + { + "epoch": 0.17761989342806395, + "grad_norm": 10.96068000793457, + "learning_rate": 4.6047430830039526e-05, + "loss": 5.3786, + "step": 50 + }, + { + "epoch": 0.21314387211367672, + "grad_norm": 10.333888053894043, + "learning_rate": 4.4071146245059295e-05, + "loss": 4.141, + "step": 60 + }, + { + "epoch": 0.24866785079928952, + "grad_norm": 12.409323692321777, + "learning_rate": 4.2094861660079056e-05, + "loss": 3.6644, + "step": 70 + }, + { + "epoch": 0.2841918294849023, + "grad_norm": 9.275422096252441, + "learning_rate": 4.011857707509882e-05, + "loss": 3.4415, + "step": 80 + }, + { + "epoch": 0.3197158081705151, + "grad_norm": 8.710288047790527, + "learning_rate": 3.814229249011858e-05, + "loss": 2.8615, + "step": 90 + }, + { + "epoch": 0.3552397868561279, + "grad_norm": 10.596097946166992, + "learning_rate": 3.616600790513834e-05, + "loss": 2.8226, + "step": 100 + }, + { + "epoch": 0.3907637655417407, + "grad_norm": 13.911111831665039, + "learning_rate": 3.418972332015811e-05, + "loss": 2.4511, + "step": 110 + }, + { + "epoch": 0.42628774422735344, + "grad_norm": 9.634197235107422, + "learning_rate": 3.221343873517787e-05, + "loss": 2.1104, + "step": 120 + }, + { + "epoch": 0.46181172291296624, + "grad_norm": 9.822806358337402, + "learning_rate": 3.0237154150197627e-05, + "loss": 2.1311, + "step": 130 + }, + { + "epoch": 0.49733570159857904, + "grad_norm": 10.18515682220459, + "learning_rate": 2.826086956521739e-05, + "loss": 1.6967, + "step": 140 + }, + { + "epoch": 0.5328596802841918, + "grad_norm": 8.908123970031738, + "learning_rate": 2.6284584980237154e-05, + "loss": 1.7048, + "step": 150 + } + ], + "logging_steps": 10, + "max_steps": 282, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6069085795418112.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-150/training_args.bin b/checkpoint-150/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e688b1a0b10ac21ae9b4ff0f7c4c9bc35abe1a4 --- /dev/null +++ b/checkpoint-150/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa297d24b7259067297e2974f66954740991aaf0075fa5df2c91096c2a5aec35 +size 5713 diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bd44b0c90589e54ac65b3e63964b66d3e43625ce --- /dev/null +++ b/checkpoint-200/README.md @@ -0,0 +1,207 @@ +--- +base_model: PaddlePaddle/PaddleOCR-VL +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:PaddlePaddle/PaddleOCR-VL +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.0 \ No newline at end of file diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37155570d2b4ded594fb262b1747669fed865f87 --- /dev/null +++ b/checkpoint-200/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "PaddlePaddle/PaddleOCR-VL", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..280cf5b490d20db82706ddf852084bc3f116556d --- /dev/null +++ b/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0501a76510ce4d761d514d4019941abfd9a34b18067725c2aed73919381ecab9 +size 22015368 diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..69a55606c7440196bc49ff749fecd136d6795101 --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68f83cbb42548be01bde9c5dc52bb978f53d64efa81059ee2bb7a3cbde9edb9 +size 44203339 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3eaeb9e545fee179ae82d921565670755b9cea6d --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc62724c5b4f7b95e502b0dc0575e36e4e56cdbcf38ea4bc606262cf95730221 +size 14645 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa4e7e0c422af8f6fdf1dfda251ed00e657fe064 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca4fdca048c34d26be18ee8819953f2c37420fb658b783e1fe2d74447e4cc3ee +size 1465 diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..71621e817376d9665173fe077689955d9ee33b80 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,182 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7104795737122558, + "eval_steps": 200, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.035523978685612786, + "grad_norm": 31.536340713500977, + "learning_rate": 1.5517241379310346e-05, + "loss": 10.1017, + "step": 10 + }, + { + "epoch": 0.07104795737122557, + "grad_norm": 10.592451095581055, + "learning_rate": 3.275862068965517e-05, + "loss": 9.1487, + "step": 20 + }, + { + "epoch": 0.10657193605683836, + "grad_norm": 8.758255004882812, + "learning_rate": 5e-05, + "loss": 7.7266, + "step": 30 + }, + { + "epoch": 0.14209591474245115, + "grad_norm": 8.455466270446777, + "learning_rate": 4.8023715415019764e-05, + "loss": 6.4992, + "step": 40 + }, + { + "epoch": 0.17761989342806395, + "grad_norm": 10.96068000793457, + "learning_rate": 4.6047430830039526e-05, + "loss": 5.3786, + "step": 50 + }, + { + "epoch": 0.21314387211367672, + "grad_norm": 10.333888053894043, + "learning_rate": 4.4071146245059295e-05, + "loss": 4.141, + "step": 60 + }, + { + "epoch": 0.24866785079928952, + "grad_norm": 12.409323692321777, + "learning_rate": 4.2094861660079056e-05, + "loss": 3.6644, + "step": 70 + }, + { + "epoch": 0.2841918294849023, + "grad_norm": 9.275422096252441, + "learning_rate": 4.011857707509882e-05, + "loss": 3.4415, + "step": 80 + }, + { + "epoch": 0.3197158081705151, + "grad_norm": 8.710288047790527, + "learning_rate": 3.814229249011858e-05, + "loss": 2.8615, + "step": 90 + }, + { + "epoch": 0.3552397868561279, + "grad_norm": 10.596097946166992, + "learning_rate": 3.616600790513834e-05, + "loss": 2.8226, + "step": 100 + }, + { + "epoch": 0.3907637655417407, + "grad_norm": 13.911111831665039, + "learning_rate": 3.418972332015811e-05, + "loss": 2.4511, + "step": 110 + }, + { + "epoch": 0.42628774422735344, + "grad_norm": 9.634197235107422, + "learning_rate": 3.221343873517787e-05, + "loss": 2.1104, + "step": 120 + }, + { + "epoch": 0.46181172291296624, + "grad_norm": 9.822806358337402, + "learning_rate": 3.0237154150197627e-05, + "loss": 2.1311, + "step": 130 + }, + { + "epoch": 0.49733570159857904, + "grad_norm": 10.18515682220459, + "learning_rate": 2.826086956521739e-05, + "loss": 1.6967, + "step": 140 + }, + { + "epoch": 0.5328596802841918, + "grad_norm": 8.908123970031738, + "learning_rate": 2.6284584980237154e-05, + "loss": 1.7048, + "step": 150 + }, + { + "epoch": 0.5683836589698046, + "grad_norm": 8.805821418762207, + "learning_rate": 2.430830039525692e-05, + "loss": 1.8322, + "step": 160 + }, + { + "epoch": 0.6039076376554174, + "grad_norm": 11.96939754486084, + "learning_rate": 2.233201581027668e-05, + "loss": 1.7147, + "step": 170 + }, + { + "epoch": 0.6394316163410302, + "grad_norm": 10.250484466552734, + "learning_rate": 2.0355731225296443e-05, + "loss": 1.4317, + "step": 180 + }, + { + "epoch": 0.6749555950266429, + "grad_norm": 8.897704124450684, + "learning_rate": 1.8379446640316205e-05, + "loss": 1.2914, + "step": 190 + }, + { + "epoch": 0.7104795737122558, + "grad_norm": 8.914377212524414, + "learning_rate": 1.640316205533597e-05, + "loss": 1.3641, + "step": 200 + }, + { + "epoch": 0.7104795737122558, + "eval_loss": 0.3791966140270233, + "eval_runtime": 9.4154, + "eval_samples_per_second": 53.104, + "eval_steps_per_second": 6.691, + "step": 200 + } + ], + "logging_steps": 10, + "max_steps": 282, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8104703246401536.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e688b1a0b10ac21ae9b4ff0f7c4c9bc35abe1a4 --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa297d24b7259067297e2974f66954740991aaf0075fa5df2c91096c2a5aec35 +size 5713 diff --git a/checkpoint-250/README.md b/checkpoint-250/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bd44b0c90589e54ac65b3e63964b66d3e43625ce --- /dev/null +++ b/checkpoint-250/README.md @@ -0,0 +1,207 @@ +--- +base_model: PaddlePaddle/PaddleOCR-VL +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:PaddlePaddle/PaddleOCR-VL +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.0 \ No newline at end of file diff --git a/checkpoint-250/adapter_config.json b/checkpoint-250/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37155570d2b4ded594fb262b1747669fed865f87 --- /dev/null +++ b/checkpoint-250/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "PaddlePaddle/PaddleOCR-VL", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-250/adapter_model.safetensors b/checkpoint-250/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b90cf0cdac3e5fcb400e54cd5057e7c5f557a8ed --- /dev/null +++ b/checkpoint-250/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:472c62880dc071d4520ec93bc0afe62a37805843dd27780e13c5a5cdd86f6ad2 +size 22015368 diff --git a/checkpoint-250/optimizer.pt b/checkpoint-250/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8359be44af0e7839c3cb778a61dcd979c06d5554 --- /dev/null +++ b/checkpoint-250/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:856b64a1f5773ffd80074aba6b6e45a36f338d1ee4ae0d7dc429bf92e5a83bf8 +size 44203339 diff --git a/checkpoint-250/rng_state.pth b/checkpoint-250/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ea5a768cdd97851d3f571f43fdd8f7b933d4ec3f --- /dev/null +++ b/checkpoint-250/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f68545dcf1ed9c65b7ba611fe68dec7b244d51a5ecd476c07c2b3a5f6484e75 +size 14645 diff --git a/checkpoint-250/scheduler.pt b/checkpoint-250/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f86710c2a68da96668bbbb439f3e9a196bad2ce1 --- /dev/null +++ b/checkpoint-250/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72f41ee5fd9167c9f7579e4ca32ecb823274d21fd992a77af029ff3dffef2ab5 +size 1465 diff --git a/checkpoint-250/trainer_state.json b/checkpoint-250/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..08f75db51bc8d2d6cc86c39a70f1fa3468252e5d --- /dev/null +++ b/checkpoint-250/trainer_state.json @@ -0,0 +1,217 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8880994671403197, + "eval_steps": 200, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.035523978685612786, + "grad_norm": 31.536340713500977, + "learning_rate": 1.5517241379310346e-05, + "loss": 10.1017, + "step": 10 + }, + { + "epoch": 0.07104795737122557, + "grad_norm": 10.592451095581055, + "learning_rate": 3.275862068965517e-05, + "loss": 9.1487, + "step": 20 + }, + { + "epoch": 0.10657193605683836, + "grad_norm": 8.758255004882812, + "learning_rate": 5e-05, + "loss": 7.7266, + "step": 30 + }, + { + "epoch": 0.14209591474245115, + "grad_norm": 8.455466270446777, + "learning_rate": 4.8023715415019764e-05, + "loss": 6.4992, + "step": 40 + }, + { + "epoch": 0.17761989342806395, + "grad_norm": 10.96068000793457, + "learning_rate": 4.6047430830039526e-05, + "loss": 5.3786, + "step": 50 + }, + { + "epoch": 0.21314387211367672, + "grad_norm": 10.333888053894043, + "learning_rate": 4.4071146245059295e-05, + "loss": 4.141, + "step": 60 + }, + { + "epoch": 0.24866785079928952, + "grad_norm": 12.409323692321777, + "learning_rate": 4.2094861660079056e-05, + "loss": 3.6644, + "step": 70 + }, + { + "epoch": 0.2841918294849023, + "grad_norm": 9.275422096252441, + "learning_rate": 4.011857707509882e-05, + "loss": 3.4415, + "step": 80 + }, + { + "epoch": 0.3197158081705151, + "grad_norm": 8.710288047790527, + "learning_rate": 3.814229249011858e-05, + "loss": 2.8615, + "step": 90 + }, + { + "epoch": 0.3552397868561279, + "grad_norm": 10.596097946166992, + "learning_rate": 3.616600790513834e-05, + "loss": 2.8226, + "step": 100 + }, + { + "epoch": 0.3907637655417407, + "grad_norm": 13.911111831665039, + "learning_rate": 3.418972332015811e-05, + "loss": 2.4511, + "step": 110 + }, + { + "epoch": 0.42628774422735344, + "grad_norm": 9.634197235107422, + "learning_rate": 3.221343873517787e-05, + "loss": 2.1104, + "step": 120 + }, + { + "epoch": 0.46181172291296624, + "grad_norm": 9.822806358337402, + "learning_rate": 3.0237154150197627e-05, + "loss": 2.1311, + "step": 130 + }, + { + "epoch": 0.49733570159857904, + "grad_norm": 10.18515682220459, + "learning_rate": 2.826086956521739e-05, + "loss": 1.6967, + "step": 140 + }, + { + "epoch": 0.5328596802841918, + "grad_norm": 8.908123970031738, + "learning_rate": 2.6284584980237154e-05, + "loss": 1.7048, + "step": 150 + }, + { + "epoch": 0.5683836589698046, + "grad_norm": 8.805821418762207, + "learning_rate": 2.430830039525692e-05, + "loss": 1.8322, + "step": 160 + }, + { + "epoch": 0.6039076376554174, + "grad_norm": 11.96939754486084, + "learning_rate": 2.233201581027668e-05, + "loss": 1.7147, + "step": 170 + }, + { + "epoch": 0.6394316163410302, + "grad_norm": 10.250484466552734, + "learning_rate": 2.0355731225296443e-05, + "loss": 1.4317, + "step": 180 + }, + { + "epoch": 0.6749555950266429, + "grad_norm": 8.897704124450684, + "learning_rate": 1.8379446640316205e-05, + "loss": 1.2914, + "step": 190 + }, + { + "epoch": 0.7104795737122558, + "grad_norm": 8.914377212524414, + "learning_rate": 1.640316205533597e-05, + "loss": 1.3641, + "step": 200 + }, + { + "epoch": 0.7104795737122558, + "eval_loss": 0.3791966140270233, + "eval_runtime": 9.4154, + "eval_samples_per_second": 53.104, + "eval_steps_per_second": 6.691, + "step": 200 + }, + { + "epoch": 0.7460035523978685, + "grad_norm": 9.695067405700684, + "learning_rate": 1.4426877470355732e-05, + "loss": 1.3576, + "step": 210 + }, + { + "epoch": 0.7815275310834814, + "grad_norm": 11.284568786621094, + "learning_rate": 1.2450592885375495e-05, + "loss": 1.4808, + "step": 220 + }, + { + "epoch": 0.8170515097690941, + "grad_norm": 9.005117416381836, + "learning_rate": 1.0474308300395258e-05, + "loss": 1.3842, + "step": 230 + }, + { + "epoch": 0.8525754884547069, + "grad_norm": 9.499415397644043, + "learning_rate": 8.49802371541502e-06, + "loss": 1.34, + "step": 240 + }, + { + "epoch": 0.8880994671403197, + "grad_norm": 9.747414588928223, + "learning_rate": 6.521739130434783e-06, + "loss": 1.3088, + "step": 250 + } + ], + "logging_steps": 10, + "max_steps": 282, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0132767385878528e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-250/training_args.bin b/checkpoint-250/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e688b1a0b10ac21ae9b4ff0f7c4c9bc35abe1a4 --- /dev/null +++ b/checkpoint-250/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa297d24b7259067297e2974f66954740991aaf0075fa5df2c91096c2a5aec35 +size 5713 diff --git a/checkpoint-282/README.md b/checkpoint-282/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bd44b0c90589e54ac65b3e63964b66d3e43625ce --- /dev/null +++ b/checkpoint-282/README.md @@ -0,0 +1,207 @@ +--- +base_model: PaddlePaddle/PaddleOCR-VL +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:PaddlePaddle/PaddleOCR-VL +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.0 \ No newline at end of file diff --git a/checkpoint-282/adapter_config.json b/checkpoint-282/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37155570d2b4ded594fb262b1747669fed865f87 --- /dev/null +++ b/checkpoint-282/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "PaddlePaddle/PaddleOCR-VL", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-282/adapter_model.safetensors b/checkpoint-282/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..efcb2b31f4f55511067774f0f0b9ca3c9fd6d88a --- /dev/null +++ b/checkpoint-282/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fce6e77d1626eb47534f1ca676573f13c1a976460539eeb104c3f3b56bbae7b +size 22015368 diff --git a/checkpoint-282/optimizer.pt b/checkpoint-282/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..18db5fbe4a7b597b8014be31ec947c1e008c3c43 --- /dev/null +++ b/checkpoint-282/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f694da218484938be3cf15eec72aeb982c8aed5fba0dd61f554e12258efc9c40 +size 44203339 diff --git a/checkpoint-282/rng_state.pth b/checkpoint-282/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e6c4747ee622adafc4380940a866bbebcd7ae0b1 --- /dev/null +++ b/checkpoint-282/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb915fbe7eac386a43c9f3f3d5324f3401d5d2fdff998824da1e07604ffb5731 +size 14645 diff --git a/checkpoint-282/scheduler.pt b/checkpoint-282/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..424c6b5b7e0f23dffb61cab2ca5cbed260295f73 --- /dev/null +++ b/checkpoint-282/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67447343d7003345a454a5a9fc74348cb0fc0b0680a0eb187016577663350f6b +size 1465 diff --git a/checkpoint-282/trainer_state.json b/checkpoint-282/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2d93ea264ced4e96bf30a79a6314c238b5e1c51b --- /dev/null +++ b/checkpoint-282/trainer_state.json @@ -0,0 +1,238 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 200, + "global_step": 282, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.035523978685612786, + "grad_norm": 31.536340713500977, + "learning_rate": 1.5517241379310346e-05, + "loss": 10.1017, + "step": 10 + }, + { + "epoch": 0.07104795737122557, + "grad_norm": 10.592451095581055, + "learning_rate": 3.275862068965517e-05, + "loss": 9.1487, + "step": 20 + }, + { + "epoch": 0.10657193605683836, + "grad_norm": 8.758255004882812, + "learning_rate": 5e-05, + "loss": 7.7266, + "step": 30 + }, + { + "epoch": 0.14209591474245115, + "grad_norm": 8.455466270446777, + "learning_rate": 4.8023715415019764e-05, + "loss": 6.4992, + "step": 40 + }, + { + "epoch": 0.17761989342806395, + "grad_norm": 10.96068000793457, + "learning_rate": 4.6047430830039526e-05, + "loss": 5.3786, + "step": 50 + }, + { + "epoch": 0.21314387211367672, + "grad_norm": 10.333888053894043, + "learning_rate": 4.4071146245059295e-05, + "loss": 4.141, + "step": 60 + }, + { + "epoch": 0.24866785079928952, + "grad_norm": 12.409323692321777, + "learning_rate": 4.2094861660079056e-05, + "loss": 3.6644, + "step": 70 + }, + { + "epoch": 0.2841918294849023, + "grad_norm": 9.275422096252441, + "learning_rate": 4.011857707509882e-05, + "loss": 3.4415, + "step": 80 + }, + { + "epoch": 0.3197158081705151, + "grad_norm": 8.710288047790527, + "learning_rate": 3.814229249011858e-05, + "loss": 2.8615, + "step": 90 + }, + { + "epoch": 0.3552397868561279, + "grad_norm": 10.596097946166992, + "learning_rate": 3.616600790513834e-05, + "loss": 2.8226, + "step": 100 + }, + { + "epoch": 0.3907637655417407, + "grad_norm": 13.911111831665039, + "learning_rate": 3.418972332015811e-05, + "loss": 2.4511, + "step": 110 + }, + { + "epoch": 0.42628774422735344, + "grad_norm": 9.634197235107422, + "learning_rate": 3.221343873517787e-05, + "loss": 2.1104, + "step": 120 + }, + { + "epoch": 0.46181172291296624, + "grad_norm": 9.822806358337402, + "learning_rate": 3.0237154150197627e-05, + "loss": 2.1311, + "step": 130 + }, + { + "epoch": 0.49733570159857904, + "grad_norm": 10.18515682220459, + "learning_rate": 2.826086956521739e-05, + "loss": 1.6967, + "step": 140 + }, + { + "epoch": 0.5328596802841918, + "grad_norm": 8.908123970031738, + "learning_rate": 2.6284584980237154e-05, + "loss": 1.7048, + "step": 150 + }, + { + "epoch": 0.5683836589698046, + "grad_norm": 8.805821418762207, + "learning_rate": 2.430830039525692e-05, + "loss": 1.8322, + "step": 160 + }, + { + "epoch": 0.6039076376554174, + "grad_norm": 11.96939754486084, + "learning_rate": 2.233201581027668e-05, + "loss": 1.7147, + "step": 170 + }, + { + "epoch": 0.6394316163410302, + "grad_norm": 10.250484466552734, + "learning_rate": 2.0355731225296443e-05, + "loss": 1.4317, + "step": 180 + }, + { + "epoch": 0.6749555950266429, + "grad_norm": 8.897704124450684, + "learning_rate": 1.8379446640316205e-05, + "loss": 1.2914, + "step": 190 + }, + { + "epoch": 0.7104795737122558, + "grad_norm": 8.914377212524414, + "learning_rate": 1.640316205533597e-05, + "loss": 1.3641, + "step": 200 + }, + { + "epoch": 0.7104795737122558, + "eval_loss": 0.3791966140270233, + "eval_runtime": 9.4154, + "eval_samples_per_second": 53.104, + "eval_steps_per_second": 6.691, + "step": 200 + }, + { + "epoch": 0.7460035523978685, + "grad_norm": 9.695067405700684, + "learning_rate": 1.4426877470355732e-05, + "loss": 1.3576, + "step": 210 + }, + { + "epoch": 0.7815275310834814, + "grad_norm": 11.284568786621094, + "learning_rate": 1.2450592885375495e-05, + "loss": 1.4808, + "step": 220 + }, + { + "epoch": 0.8170515097690941, + "grad_norm": 9.005117416381836, + "learning_rate": 1.0474308300395258e-05, + "loss": 1.3842, + "step": 230 + }, + { + "epoch": 0.8525754884547069, + "grad_norm": 9.499415397644043, + "learning_rate": 8.49802371541502e-06, + "loss": 1.34, + "step": 240 + }, + { + "epoch": 0.8880994671403197, + "grad_norm": 9.747414588928223, + "learning_rate": 6.521739130434783e-06, + "loss": 1.3088, + "step": 250 + }, + { + "epoch": 0.9236234458259325, + "grad_norm": 9.608942985534668, + "learning_rate": 4.5454545454545455e-06, + "loss": 1.2627, + "step": 260 + }, + { + "epoch": 0.9591474245115453, + "grad_norm": 14.37006664276123, + "learning_rate": 2.5691699604743086e-06, + "loss": 1.3651, + "step": 270 + }, + { + "epoch": 0.9946714031971581, + "grad_norm": 8.14010238647461, + "learning_rate": 5.928853754940711e-07, + "loss": 1.221, + "step": 280 + } + ], + "logging_steps": 10, + "max_steps": 282, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.140392676814848e+16, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-282/training_args.bin b/checkpoint-282/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e688b1a0b10ac21ae9b4ff0f7c4c9bc35abe1a4 --- /dev/null +++ b/checkpoint-282/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa297d24b7259067297e2974f66954740991aaf0075fa5df2c91096c2a5aec35 +size 5713 diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bd44b0c90589e54ac65b3e63964b66d3e43625ce --- /dev/null +++ b/checkpoint-50/README.md @@ -0,0 +1,207 @@ +--- +base_model: PaddlePaddle/PaddleOCR-VL +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:PaddlePaddle/PaddleOCR-VL +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.0 \ No newline at end of file diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..37155570d2b4ded594fb262b1747669fed865f87 --- /dev/null +++ b/checkpoint-50/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "PaddlePaddle/PaddleOCR-VL", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-50/adapter_model.safetensors b/checkpoint-50/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b195711e5d27a18866a6cfba4a99793ccf563b9a --- /dev/null +++ b/checkpoint-50/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a555e1b383dbe7e1c8766290deea5f0f8f8a627a344bb428e8ef9a7593814390 +size 22015368 diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..73649255e4ad7794d93d6f6d772d6c430193d06f --- /dev/null +++ b/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:521fdd438eeadbea7a0bed123498ae977562056c65b0f070585694e9231d1021 +size 44203339 diff --git a/checkpoint-50/rng_state.pth b/checkpoint-50/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..17081b0efa6ea457ef4dabe0ef16b5ce8d89db03 --- /dev/null +++ b/checkpoint-50/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e65a2cd083c5fc52f7a40478cbf829deb261bc34c95a27394193601cd8aea76e +size 14645 diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2cecb0467688fec483abf964f8a0671c10215aa --- /dev/null +++ b/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0eeb0305a8ff2a5292f27e705fd389b9dad63661d0157bc1f10be36ae926915 +size 1465 diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..88288f08221b8e13ff586c01e9ad7218241709b6 --- /dev/null +++ b/checkpoint-50/trainer_state.json @@ -0,0 +1,69 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.17761989342806395, + "eval_steps": 200, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.035523978685612786, + "grad_norm": 31.536340713500977, + "learning_rate": 1.5517241379310346e-05, + "loss": 10.1017, + "step": 10 + }, + { + "epoch": 0.07104795737122557, + "grad_norm": 10.592451095581055, + "learning_rate": 3.275862068965517e-05, + "loss": 9.1487, + "step": 20 + }, + { + "epoch": 0.10657193605683836, + "grad_norm": 8.758255004882812, + "learning_rate": 5e-05, + "loss": 7.7266, + "step": 30 + }, + { + "epoch": 0.14209591474245115, + "grad_norm": 8.455466270446777, + "learning_rate": 4.8023715415019764e-05, + "loss": 6.4992, + "step": 40 + }, + { + "epoch": 0.17761989342806395, + "grad_norm": 10.96068000793457, + "learning_rate": 4.6047430830039526e-05, + "loss": 5.3786, + "step": 50 + } + ], + "logging_steps": 10, + "max_steps": 282, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2018622500093952.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e688b1a0b10ac21ae9b4ff0f7c4c9bc35abe1a4 --- /dev/null +++ b/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa297d24b7259067297e2974f66954740991aaf0075fa5df2c91096c2a5aec35 +size 5713 diff --git a/image_processing_paddleocr_vl.py b/image_processing_paddleocr_vl.py new file mode 100644 index 0000000000000000000000000000000000000000..7e1d50a81f0bd0b59bf68a78b2f81c117f1ccf00 --- /dev/null +++ b/image_processing_paddleocr_vl.py @@ -0,0 +1,569 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Image processor class for PaddleOCR-VL.""" + +import math +from typing import Dict, List, Optional, Union + +import numpy as np +import torch +from transformers.image_processing_utils import BaseImageProcessor, BatchFeature +from torchvision.transforms import functional as TF +from transformers.image_transforms import ( + convert_to_rgb, + resize, + to_channel_dimension_format, +) +from transformers.image_utils import ( + OPENAI_CLIP_MEAN, + OPENAI_CLIP_STD, + ChannelDimension, + PILImageResampling, + get_image_size, + infer_channel_dimension_format, + is_scaled_image, + is_valid_image, + make_list_of_images, + to_numpy_array, + valid_images, + validate_preprocess_arguments, +) +from transformers.utils import TensorType, is_vision_available, logging + + +logger = logging.get_logger(__name__) + + +if is_vision_available(): + from PIL import Image + +ImageInput = Union[ + "PIL.Image.Image", + np.ndarray, + "torch.Tensor", + List["PIL.Image.Image"], + List[np.ndarray], + List["torch.Tensor"], +] # noqa + + +VideoInput = Union[ + List["PIL.Image.Image"], + "np.ndarray", + "torch.Tensor", + List["np.ndarray"], + List["torch.Tensor"], + List[List["PIL.Image.Image"]], + List[List["np.ndarrray"]], + List[List["torch.Tensor"]], +] # noqa + + +def make_batched_images(images) -> List[List[ImageInput]]: + """ + Accepts images in list or nested list format, and makes a list of images for preprocessing. + + Args: + images (`Union[List[List[ImageInput]], List[ImageInput], ImageInput]`): + The input image. + + Returns: + list: A list of images. + """ + if ( + isinstance(images, (list, tuple)) + and isinstance(images[0], (list, tuple)) + and is_valid_image(images[0][0]) + ): + return [img for img_list in images for img in img_list] + + elif isinstance(images, (list, tuple)) and is_valid_image(images[0]): + return images + + elif is_valid_image(images): + return [images] + + raise ValueError(f"Could not make batched images from {images}") + + +def adjust_size(size, patch_size): + num_patches = size // patch_size + if num_patches % 2 != 0: # 如果是奇数,减1 + num_patches -= 1 + return num_patches * patch_size + + +def make_batched_videos(videos) -> List[VideoInput]: + if ( + isinstance(videos, (list, tuple)) + and isinstance(videos[0], (list, tuple)) + and is_valid_image(videos[0][0]) + ): + return videos + + elif isinstance(videos, (list, tuple)) and is_valid_image(videos[0]): + if isinstance(videos[0], Image.Image): + return [videos] + elif len(videos[0].shape) == 4: + return [list(video) for video in videos] + + elif is_valid_image(videos) and len(videos.shape) == 4: + return [list(videos)] + + raise ValueError(f"Could not make batched video from {videos}") + + +def smart_resize( + height: int, + width: int, + factor: int = 28, + min_pixels: int = 28 * 28 * 130, + max_pixels: int = 28 * 28 * 1280, +): + """Rescales the image so that the following conditions are met: + + 1. Both dimensions (height and width) are divisible by 'factor'. + + 2. The total number of pixels is within the range ['min_pixels', 'max_pixels']. + + 3. The aspect ratio of the image is maintained as closely as possible. + + """ + # if height < factor or width < factor: + # raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}") + # if int(height < factor//4) + int(width < factor//4): + # raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor//4}") + + if height < factor: + print(f"smart_resize: height={height} < factor={factor}, reset height=factor") + width = round((width * factor) / height) + height = factor + + if width < factor: + print(f"smart_resize: width={width} < factor={factor}, reset width=factor") + height = round((height * factor) / width) + width = factor + + if max(height, width) / min(height, width) > 200: + raise ValueError( + f"absolute aspect ratio must be smaller than 200, got {max(height, width) / min(height, width)}" + ) + h_bar = round(height / factor) * factor + w_bar = round(width / factor) * factor + if h_bar * w_bar > max_pixels: + beta = math.sqrt((height * width) / max_pixels) + h_bar = math.floor(height / beta / factor) * factor + w_bar = math.floor(width / beta / factor) * factor + elif h_bar * w_bar < min_pixels: + beta = math.sqrt(min_pixels / (height * width)) + h_bar = math.ceil(height * beta / factor) * factor + w_bar = math.ceil(width * beta / factor) * factor + return h_bar, w_bar + + +class PaddleOCRVLImageProcessor(BaseImageProcessor): + r""" + Constructs a Siglip image processor that dynamically resizes images based on the original images. + + Args: + do_resize (`bool`, *optional*, defaults to `True`): + Whether to resize the image's (height, width) dimensions. + resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`): + Resampling filter to use when resizing the image. + do_rescale (`bool`, *optional*, defaults to `True`): + Whether to rescale the image by the specified scale `rescale_factor`. + rescale_factor (`int` or `float`, *optional*, defaults to `1/255`): + Scale factor to use if rescaling the image. + do_normalize (`bool`, *optional*, defaults to `True`): + Whether to normalize the image. + image_mean (`float` or `List[float]`, *optional*, defaults to `[0.48145466, 0.4578275, 0.40821073]`): + Mean to use if normalizing the image. This is a float or list of floats for each channel in the image. + image_std (`float` or `List[float]`, *optional*, defaults to `[0.26862954, 0.26130258, 0.27577711]`): + Standard deviation to use if normalizing the image. This is a float or list of floats for each channel in the image. + do_convert_rgb (`bool`, *optional*, defaults to `True`): + Whether to convert the image to RGB. + min_pixels (`int`, *optional*, defaults to `28 * 28 * 130`): + The min pixels of the image to resize the image. + max_pixels (`int`, *optional*, defaults to `28 * 28 * 1670`): + The max pixels of the image to resize the image. + patch_size (`int`, *optional*, defaults to 14): + The spacial patch size of the vision encoder. + temporal_patch_size (`int`, *optional*, defaults to 2): + The temporal patch size of the vision encoder. + merge_size (`int`, *optional*, defaults to 2): + The merge size of the vision encoder to llm encoder. + """ + + model_input_names = [ + "pixel_values", + "image_grid_thw", + "pixel_values_videos", + "video_grid_thw", + ] + + def __init__( + self, + do_resize: bool = True, + resample: PILImageResampling = PILImageResampling.BICUBIC, + do_rescale: bool = True, + rescale_factor: Union[int, float] = 1 / 255, + do_normalize: bool = True, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_convert_rgb: bool = True, + min_pixels: int = 28 * 28 * 130, + max_pixels: int = 28 * 28 * 1280, + patch_size: int = 14, + temporal_patch_size: int = 1, + merge_size: int = 2, + **kwargs, + ) -> None: + super().__init__(**kwargs) + self.do_resize = do_resize + self.resample = resample + self.do_rescale = do_rescale + self.rescale_factor = rescale_factor + self.do_normalize = do_normalize + self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN + self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD + self.min_pixels = min_pixels + self.max_pixels = max_pixels + self.patch_size = patch_size + self.temporal_patch_size = temporal_patch_size + self.merge_size = merge_size + self.size = {"min_pixels": min_pixels, "max_pixels": max_pixels} # not used + self.do_convert_rgb = do_convert_rgb + + def mvit_rescale(self, image: Image.Image, merge_size: int = 2) -> Image.Image: + try: + w, h = image.size + except: + raise ValueError(str((type(image), image))) + patch_size = self.patch_size + + if (w // patch_size) * (h // patch_size) > self.in_token_limit: + scale = math.sqrt( + self.in_token_limit / ((w // patch_size) * (h // patch_size)) + ) + new_w, new_h = int(w * scale), int(h * scale) + + image = image.resize((new_w, new_h), Image.Resampling.BICUBIC) + if self.pad_input: + new_w, new_h = image.size + pad_size_h = merge_size * patch_size + pad_size_w = merge_size * patch_size + + pad_h = (pad_size_h - new_h % pad_size_h) % pad_size_h + pad_w = (pad_size_w - new_w % pad_size_w) % pad_size_w + + image = TF.pad(image, (0, 0, pad_w, pad_h)) + else: + new_w, new_h = image.size + new_w = new_w - new_w % patch_size + new_h = new_h - new_h % patch_size + + new_w = adjust_size(new_w, patch_size) + new_h = adjust_size(new_h, patch_size) + + image = TF.center_crop(image, (new_h, new_w)) + + w, h = image.size + if w // patch_size >= 512 or h // patch_size >= 512: + new_h = min(patch_size * 510, h) + new_w = min(patch_size * 510, w) + image = TF.center_crop(image, (new_h, new_w)) + # raise ValueError("Exceed pos emb") + return image + + def _preprocess( + self, + images: Union[ImageInput, VideoInput], + do_resize: bool = None, + resample: PILImageResampling = None, + do_rescale: bool = None, + rescale_factor: float = None, + do_normalize: bool = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_convert_rgb: bool = None, + data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, + input_data_format: Optional[Union[str, ChannelDimension]] = None, + ): + """ + Preprocess an image or batch of images. Copy of the `preprocess` method from `CLIPImageProcessor`. + + Args: + images (`ImageInput`): + Image or batch of images to preprocess. Expects pixel values ranging from 0 to 255. If pixel values range from 0 to 1, set `do_rescale=False`. + vision_info (`List[Dict]`, *optional*): + Optional list of dictionaries containing additional information about vision inputs. + do_resize (`bool`, *optional*, defaults to `self.do_resize`): + Whether to resize the image. + resample (`PILImageResampling`, *optional*, defaults to `self.resample`): + Resampling filter to use if resizing the image. This can be one of the `PILImageResampling` enums. + do_rescale (`bool`, *optional*, defaults to `self.do_rescale`): + Whether to rescale the image. + rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`): + Scale factor to use if rescaling the image. + do_normalize (`bool`, *optional*, defaults to `self.do_normalize`): + Whether to normalize the image. + image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`): + Mean to use if normalizing the image. Can be a float or a list of floats corresponding to the number of channels in the image. + image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`): + Standard deviation to use if normalizing the image. Can be a float or a list of floats corresponding to the number of channels in the image. + do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`): + Whether to convert the image to RGB. + data_format (`ChannelDimension`, *optional*, defaults to `ChannelDimension.FIRST`): + The channel dimension format for the output image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - Unset: Use the channel dimension format of the input image. + input_data_format (`ChannelDimension` or `str`, *optional*): + The channel dimension format for the input image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. + """ + images = make_list_of_images(images) + + if do_convert_rgb: + images = [convert_to_rgb(image) for image in images] + + # All transformations expect numpy arrays. + images = [to_numpy_array(image) for image in images] + + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: + # We assume that all images have the same channel dimension format. + input_data_format = infer_channel_dimension_format(images[0]) + + height, width = get_image_size(images[0], channel_dim=input_data_format) + resized_height, resized_width = height, width + processed_images = [] + + for image in images: + if do_resize: + resized_height, resized_width = smart_resize( + height, + width, + factor=self.patch_size * self.merge_size, + min_pixels=self.min_pixels, + max_pixels=self.max_pixels, + ) + image = resize( + image, + size=(resized_height, resized_width), + resample=resample, + input_data_format=input_data_format, + ) + + if do_rescale: + image = self.rescale( + image, scale=rescale_factor, input_data_format=input_data_format + ) + + if do_normalize: + image = self.normalize( + image=image, + mean=image_mean, + std=image_std, + input_data_format=input_data_format, + ) + image = to_channel_dimension_format( + image, data_format, input_channel_dim=input_data_format + ) + processed_images.append(image) + + patches = np.array(processed_images) + if data_format == ChannelDimension.LAST: + patches = patches.transpose(0, 3, 1, 2) + if patches.shape[0] == 1: + patches = np.tile(patches, (self.temporal_patch_size, 1, 1, 1)) + init_patches = patches + channel = patches.shape[1] + grid_t = patches.shape[0] // self.temporal_patch_size + grid_h, grid_w = ( + resized_height // self.patch_size, + resized_width // self.patch_size, + ) + patches = patches.reshape( + grid_t, + self.temporal_patch_size, + channel, + grid_h, + self.patch_size, + grid_w, + self.patch_size, + ) + patches = patches.transpose(0, 3, 5, 2, 1, 4, 6) + assert self.temporal_patch_size == 1 + flatten_patches = patches.reshape( + grid_t * grid_h * grid_w, channel, self.patch_size, self.patch_size + ) + return flatten_patches, (grid_t, grid_h, grid_w) + + def preprocess( + self, + images: ImageInput, + videos: VideoInput = None, + do_resize: bool = None, + size: Dict[str, int] = None, + resample: PILImageResampling = None, + do_rescale: bool = None, + rescale_factor: float = None, + do_normalize: bool = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_convert_rgb: bool = None, + return_tensors: Optional[Union[str, TensorType]] = None, + data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, + input_data_format: Optional[Union[str, ChannelDimension]] = None, + ): + """ + Args: + images (`ImageInput`): + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. + videos (`VideoInput`): + Video to preprocess. Expects a single or batch of videos with pixel values ranging from 0 to 255. If + passing in videos with pixel values between 0 and 1, set `do_rescale=False`. + do_resize (`bool`, *optional*, defaults to `self.do_resize`): + Whether to resize the image. + size (`Dict[str, int]`, *optional*, defaults to `self.size`): + Size of the image after resizing. Shortest edge of the image is resized to size["shortest_edge"], with + the longest edge resized to keep the input aspect ratio. + resample (`int`, *optional*, defaults to `self.resample`): + Resampling filter to use if resizing the image. This can be one of the enum `PILImageResampling`. Only + has an effect if `do_resize` is set to `True`. + do_rescale (`bool`, *optional*, defaults to `self.do_rescale`): + Whether to rescale the image. + rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`): + Rescale factor to rescale the image by if `do_rescale` is set to `True`. + do_normalize (`bool`, *optional*, defaults to `self.do_normalize`): + Whether to normalize the image. + image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`): + Image mean to use for normalization. Only has an effect if `do_normalize` is set to `True`. + image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`): + Image standard deviation to use for normalization. Only has an effect if `do_normalize` is set to + `True`. + do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`): + Whether to convert the image to RGB. + return_tensors (`str` or `TensorType`, *optional*): + The type of tensors to return. Can be one of: + - Unset: Return a list of `np.ndarray`. + - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`. + - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`. + - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`. + - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`. + data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`): + The channel dimension format for the output image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - Unset: Use the channel dimension format of the input image. + input_data_format (`ChannelDimension` or `str`, *optional*): + The channel dimension format for the input image. If unset, the channel dimension format is inferred + from the input image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. + + """ + do_resize = do_resize if do_resize is not None else self.do_resize + size = size if size is not None else self.size + resample = resample if resample is not None else self.resample + do_rescale = do_rescale if do_rescale is not None else self.do_rescale + rescale_factor = ( + rescale_factor if rescale_factor is not None else self.rescale_factor + ) + do_normalize = do_normalize if do_normalize is not None else self.do_normalize + image_mean = image_mean if image_mean is not None else self.image_mean + image_std = image_std if image_std is not None else self.image_std + do_convert_rgb = ( + do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb + ) + + if images is not None: + images = make_batched_images(images) + if videos is not None: + videos = make_batched_videos(videos) + + if images is not None and not valid_images(images): + raise ValueError( + "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " + "torch.Tensor, tf.Tensor or jax.ndarray." + ) + + validate_preprocess_arguments( + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_resize=do_resize, + size=size, + resample=resample, + ) + + if images is not None: + pixel_values, vision_grid_thws = [], [] + for image in images: + patches, image_grid_thw = self._preprocess( + image, + do_resize=do_resize, + resample=resample, + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + data_format=data_format, + do_convert_rgb=do_convert_rgb, + input_data_format=input_data_format, + ) + pixel_values.extend(patches) + vision_grid_thws.append(image_grid_thw) + pixel_values = np.array(pixel_values) + vision_grid_thws = np.array(vision_grid_thws) + data = {"pixel_values": pixel_values, "image_grid_thw": vision_grid_thws} + + if videos is not None: + pixel_values, vision_grid_thws = [], [] + for images in videos: + patches, video_grid_thw = self._preprocess( + images, + do_resize=do_resize, + resample=resample, + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + data_format=data_format, + do_convert_rgb=do_convert_rgb, + input_data_format=input_data_format, + ) + pixel_values.extend(patches) + vision_grid_thws.append(video_grid_thw) + pixel_values = np.array(pixel_values) + vision_grid_thws = np.array(vision_grid_thws) + data = { + "pixel_values_videos": pixel_values, + "video_grid_thw": vision_grid_thws, + } + + return BatchFeature(data=data, tensor_type=return_tensors) diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6526d505dd79cbe9bcfbff4ec1ba74aa8ad91eb1 --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,33 @@ +{ + "auto_map": { + "AutoImageProcessor": "image_processing_paddleocr_vl.PaddleOCRVLImageProcessor", + "AutoProcessor": "processing_paddleocr_vl.PaddleOCRVLProcessor" + }, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.5, + 0.5, + 0.5 + ], + "image_processor_type": "PaddleOCRVLImageProcessor", + "image_std": [ + 0.5, + 0.5, + 0.5 + ], + "max_pixels": 2822400, + "merge_size": 2, + "min_pixels": 147384, + "patch_size": 14, + "processor_class": "PaddleOCRVLProcessor", + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "max_pixels": 2822400, + "min_pixels": 147384 + }, + "temporal_patch_size": 1 +} diff --git a/processing_paddleocr_vl.py b/processing_paddleocr_vl.py new file mode 100644 index 0000000000000000000000000000000000000000..73c3faeff201555fc7b52709848e3c669419dbb1 --- /dev/null +++ b/processing_paddleocr_vl.py @@ -0,0 +1,293 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Union +import numpy as np +import torch +from transformers.feature_extraction_utils import BatchFeature +from transformers.processing_utils import ( + ProcessingKwargs, + ProcessorMixin, + Unpack, + VideosKwargs, +) +from transformers.tokenization_utils_base import PreTokenizedInput, TextInput + + +ImageInput = Union[ + "PIL.Image.Image", + np.ndarray, + "torch.Tensor", + List["PIL.Image.Image"], + List[np.ndarray], + List["torch.Tensor"], +] # noqa + + +VideoInput = Union[ + List["PIL.Image.Image"], + "np.ndarray", + "torch.Tensor", + List["np.ndarray"], + List["torch.Tensor"], + List[List["PIL.Image.Image"]], + List[List["np.ndarrray"]], + List[List["torch.Tensor"]], +] # noqa + + +class PaddleOCRVLVideosProcessorKwargs(VideosKwargs, total=False): + fps: Union[List[float], float] + + +class PaddleOCRVLProcessorKwargs(ProcessingKwargs, total=False): + videos_kwargs: PaddleOCRVLVideosProcessorKwargs + _defaults = { + "text_kwargs": { + "padding": False, + }, + "videos_kwargs": {"fps": 2.0}, + } + + +class PaddleOCRVLProcessor(ProcessorMixin): + r""" + [`PaddleOCRVLProcessor`] offers all the functionalities of [`SiglipImageProcessor`] and [`Qwen2TokenizerFast`]. See the + [`~PaddleOCRVLProcessor.__call__`] and [`~PaddleOCRVLProcessor.decode`] for more information. + Args: + image_processor ([`SiglipImageProcessor`], *optional*): + The image processor is a required input. + tokenizer ([`Qwen2TokenizerFast`], *optional*): + The tokenizer is a required input. + chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages + in a chat into a tokenizable string. + """ + + attributes = ["image_processor", "tokenizer"] + valid_kwargs = [ + "chat_template", + "image_std", + "min_pixels", + "image_mean", + "merge_size", + "image_processor_type", + "temporal_patch_size", + "patch_size", + "max_pixels", + ] + + image_processor_class = "AutoImageProcessor" + tokenizer_class = "AutoTokenizer" + + def __init__( + self, image_processor=None, tokenizer=None, chat_template=None, **kwargs + ): + self.image_token = ( + "<|IMAGE_PLACEHOLDER|>" + if not hasattr(tokenizer, "image_token") + else tokenizer.image_token + ) + self.video_token = ( + "<|video_pad|>" + if not hasattr(tokenizer, "video_token") + else tokenizer.video_token + ) + super().__init__(image_processor, tokenizer, chat_template=chat_template) + + def __call__( + self, + images: ImageInput = None, + text: Union[ + TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput] + ] = None, + videos: VideoInput = None, + **kwargs: Unpack[PaddleOCRVLProcessorKwargs], + ) -> BatchFeature: + """ + Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text` + and `kwargs` arguments to Qwen2TokenizerFast's [`~Qwen2TokenizerFast.__call__`] if `text` is not `None` to encode + the text. To prepare the vision inputs, this method forwards the `vision_infos` and `kwrags` arguments to + SiglipImageProcessor's [`~SiglipImageProcessor.__call__`] if `vision_infos` is not `None`. + + Args: + images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`): + The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch + tensor. Both channels-first and channels-last formats are supported. + text (`str`, `List[str]`, `List[List[str]]`): + The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings + (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set + `is_split_into_words=True` (to lift the ambiguity with a batch of sequences). + videos (`np.ndarray`, `torch.Tensor`, `List[np.ndarray]`, `List[torch.Tensor]`): + The image or batch of videos to be prepared. Each video can be a 4D NumPy array or PyTorch + tensor, or a nested list of 3D frames. Both channels-first and channels-last formats are supported. + return_tensors (`str` or [`~utils.TensorType`], *optional*): + If set, will return tensors of a particular framework. Acceptable values are: + - `'tf'`: Return TensorFlow `tf.constant` objects. + - `'pt'`: Return PyTorch `torch.Tensor` objects. + - `'np'`: Return NumPy `np.ndarray` objects. + - `'jax'`: Return JAX `jnp.ndarray` objects. + + Returns: + [`BatchFeature`]: A [`BatchFeature`] with the following fields: + + - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`. + - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when + `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not + `None`). + - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`. + - **pixel_values_videos** -- Pixel values of videos to be fed to a model. Returned when `videos` is not `None`. + - **image_grid_thw** -- List of image 3D grid in LLM. Returned when `images` is not `None`. + - **video_grid_thw** -- List of video 3D grid in LLM. Returned when `videos` is not `None`. + - **second_per_grid_ts** -- List of video seconds per time grid. Returned when `videos` is not `None`. + """ + output_kwargs = self._merge_kwargs( + PaddleOCRVLProcessorKwargs, + tokenizer_init_kwargs=self.tokenizer.init_kwargs, + **kwargs, + ) + + if images is not None: + image_inputs = self.image_processor(images=images, return_tensors="pt") + image_inputs["pixel_values"] = image_inputs["pixel_values"] + image_grid_thw = image_inputs["image_grid_thw"] + + else: + image_inputs = {} + image_grid_thw = None + + if videos is not None: + # TODO: add video processing + videos_inputs = self.image_processor( + images=None, videos=videos, **output_kwargs["images_kwargs"] + ) + video_grid_thw = videos_inputs["video_grid_thw"] + + fps = output_kwargs["videos_kwargs"].pop("fps", 2.0) + if isinstance(fps, (int, float)): + second_per_grid_ts = [ + self.image_processor.temporal_patch_size / fps + ] * len(video_grid_thw) + elif hasattr(fps, "__len__") and len(fps) == len(video_grid_thw): + second_per_grid_ts = [ + self.image_processor.temporal_patch_size / tmp for tmp in fps + ] + else: + raise ValueError( + f"The length of fps ({len(fps) if hasattr(fps, '__len__') else fps}) must be equal to the length of video_grid_thw ({len(video_grid_thw)}) or fps should be a single number." + ) + videos_inputs.update( + {"second_per_grid_ts": torch.tensor(second_per_grid_ts)} + ) + + else: + videos_inputs = {} + video_grid_thw = None + + if not isinstance(text, list): + text = [text] + + if image_grid_thw is not None: + index = 0 + for i in range(len(text)): + while self.image_token in text[i]: + text[i] = text[i].replace( + self.image_token, + "<|placeholder|>" + * ( + image_grid_thw[index].prod() + // self.image_processor.merge_size + // self.image_processor.merge_size + ), + 1, + ) + index += 1 + text[i] = text[i].replace("<|placeholder|>", self.image_token) + + if video_grid_thw is not None: + index = 0 + for i in range(len(text)): + while self.video_token in text[i]: + text[i] = text[i].replace( + self.video_token, + "<|placeholder|>" + * ( + video_grid_thw[index].prod() + // self.image_processor.merge_size + // self.image_processor.merge_size + ), + 1, + ) + index += 1 + text[i] = text[i].replace("<|placeholder|>", self.video_token) + + text_inputs = self.tokenizer(text, **output_kwargs["text_kwargs"]) + + return BatchFeature(data={**text_inputs, **image_inputs, **videos_inputs}) + + def batch_decode(self, *args, **kwargs): + """ + This method forwards all its arguments to Qwen2TokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please + refer to the docstring of this method for more information. + """ + return self.tokenizer.batch_decode(*args, **kwargs) + + def decode(self, *args, **kwargs): + """ + This method forwards all its arguments to Qwen2TokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to + the docstring of this method for more information. + """ + return self.tokenizer.decode(*args, **kwargs) + + def post_process_image_text_to_text( + self, + generated_outputs, + skip_special_tokens=True, + clean_up_tokenization_spaces=False, + **kwargs, + ): + """ + Post-process the output of the model to decode the text. + + Args: + generated_outputs (`torch.Tensor` or `np.ndarray`): + The output of the model `generate` function. The output is expected to be a tensor of shape `(batch_size, sequence_length)` + or `(sequence_length,)`. + skip_special_tokens (`bool`, *optional*, defaults to `True`): + Whether or not to remove special tokens in the output. Argument passed to the tokenizer's `batch_decode` method. + Clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`): + Whether or not to clean up the tokenization spaces. Argument passed to the tokenizer's `batch_decode` method. + **kwargs: + Additional arguments to be passed to the tokenizer's `batch_decode method`. + + Returns: + `List[str]`: The decoded text. + """ + return self.tokenizer.batch_decode( + generated_outputs, + skip_special_tokens=skip_special_tokens, + clean_up_tokenization_spaces=clean_up_tokenization_spaces, + **kwargs, + ) + + @property + def model_input_names(self): + tokenizer_input_names = self.tokenizer.model_input_names + image_processor_input_names = self.image_processor.model_input_names + names_from_processor = list( + dict.fromkeys(tokenizer_input_names + image_processor_input_names) + ) + return names_from_processor + ["second_per_grid_ts"] + + +__all__ = ["PaddleOCRVLProcessor", "PaddleOCRVLProcessor"] diff --git a/processor_config.json b/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..033053ac4d8b5de2e47884ce85a6b4939cc58e87 --- /dev/null +++ b/processor_config.json @@ -0,0 +1,6 @@ +{ + "auto_map": { + "AutoProcessor": "processing_paddleocr_vl.PaddleOCRVLProcessor" + }, + "processor_class": "PaddleOCRVLProcessor" +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c4aec2f502327373e3524be6ac37ff9070dec8b --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,58 @@ +{ + "additional_special_tokens": [ + "<|IMAGE_PLACEHOLDER|>", + "<|image_pad|>", + "<|IMAGE_START|>", + "<|IMAGE_END|>", + "<|video_pad|>" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "<|begin_of_sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "<|end_of_sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..44c2e443564a0b79b6eb594bea84fabec088f722 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f90f04fd8e5eb6dfa380f37d10c87392de8438dccb6768a2486b5a96ee76dba6 +size 11187679 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..81e33aa7fdb8ef76fceb1a69f1997021f3056770 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34ef7db83df785924fb83d7b887b6e822a031c56e15cff40aaf9b982988180df +size 1614363 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..035d3e74cb48b3d1105af9d22c731137992f6910 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,8346 @@ +{ + "add_bos_token": false, + "add_eos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "0", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "4": { + "content": "1", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "3", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "4", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "5", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "6", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "7", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "8", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "9", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100272": { + "content": "<|end_of_sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100273": { + "content": "<|begin_of_sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100274": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100277": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100278": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100279": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100280": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100295": { + "content": "<|IMAGE_PLACEHOLDER|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100296": { + "content": "<|AUDIO_PLACEHOLDER|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100297": { + "content": "<|LOC_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100298": { + "content": "<|LOC_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100299": { + "content": "<|LOC_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100300": { + "content": "<|LOC_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100301": { + "content": "<|LOC_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100302": { + "content": "<|LOC_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100303": { + "content": "<|LOC_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100304": { + "content": "<|LOC_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100305": { + "content": "<|LOC_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100306": { + "content": "<|LOC_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100307": { + "content": "<|LOC_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100308": { + "content": "<|LOC_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100309": { + "content": "<|LOC_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100310": { + "content": "<|LOC_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100311": { + "content": "<|LOC_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100312": { + "content": "<|LOC_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100313": { + "content": "<|LOC_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100314": { + "content": "<|LOC_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100315": { + "content": "<|LOC_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100316": { + "content": "<|LOC_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100317": { + "content": "<|LOC_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100318": { + "content": "<|LOC_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100319": { + "content": "<|LOC_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100320": { + "content": "<|LOC_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100321": { + "content": "<|LOC_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100322": { + "content": "<|LOC_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100323": { + "content": "<|LOC_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100324": { + "content": "<|LOC_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100325": { + "content": "<|LOC_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100326": { + "content": "<|LOC_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100327": { + "content": "<|LOC_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100328": { + "content": "<|LOC_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100329": { + "content": "<|LOC_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100330": { + "content": "<|LOC_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100331": { + "content": "<|LOC_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100332": { + "content": "<|LOC_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100333": { + "content": "<|LOC_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100334": { + "content": "<|LOC_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100335": { + "content": "<|LOC_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100336": { + "content": "<|LOC_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100337": { + "content": "<|LOC_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100338": { + "content": "<|LOC_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100339": { + "content": "<|LOC_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100340": { + "content": "<|LOC_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100341": { + "content": "<|LOC_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100342": { + "content": "<|LOC_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100343": { + "content": "<|LOC_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100344": { + "content": "<|LOC_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100345": { + "content": "<|LOC_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100346": { + "content": "<|LOC_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100347": { + "content": "<|LOC_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100348": { + "content": "<|LOC_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100349": { + "content": "<|LOC_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100350": { + "content": "<|LOC_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100351": { + "content": "<|LOC_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100352": { + "content": "<|LOC_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100353": { + "content": "<|LOC_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100354": { + "content": "<|LOC_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100355": { + "content": "<|LOC_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100356": { + "content": "<|LOC_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100357": { + "content": "<|LOC_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100358": { + "content": "<|LOC_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100359": { + "content": "<|LOC_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100360": { + "content": "<|LOC_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100361": { + "content": "<|LOC_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100362": { + "content": "<|LOC_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100363": { + "content": "<|LOC_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100364": { + "content": "<|LOC_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100365": { + "content": "<|LOC_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100366": { + "content": "<|LOC_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100367": { + "content": "<|LOC_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100368": { + "content": "<|LOC_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100369": { + "content": "<|LOC_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100370": { + "content": "<|LOC_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100371": { + "content": "<|LOC_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100372": { + "content": "<|LOC_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100373": { + "content": "<|LOC_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100374": { + "content": "<|LOC_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100375": { + "content": "<|LOC_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100376": { + "content": "<|LOC_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100377": { + "content": "<|LOC_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100378": { + "content": "<|LOC_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100379": { + "content": "<|LOC_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100380": { + "content": "<|LOC_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100381": { + "content": "<|LOC_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100382": { + "content": "<|LOC_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100383": { + "content": "<|LOC_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100384": { + "content": "<|LOC_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100385": { + "content": "<|LOC_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100386": { + "content": "<|LOC_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100387": { + "content": "<|LOC_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100388": { + "content": "<|LOC_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100389": { + "content": "<|LOC_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100390": { + "content": "<|LOC_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100391": { + "content": "<|LOC_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100392": { + "content": "<|LOC_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100393": { + "content": "<|LOC_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100394": { + "content": "<|LOC_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100395": { + "content": "<|LOC_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100396": { + "content": "<|LOC_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100397": { + "content": "<|LOC_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100398": { + "content": "<|LOC_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100399": { + "content": "<|LOC_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100400": { + "content": "<|LOC_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100401": { + "content": "<|LOC_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100402": { + "content": "<|LOC_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100403": { + "content": "<|LOC_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100404": { + "content": "<|LOC_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100405": { + "content": "<|LOC_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100406": { + "content": "<|LOC_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100407": { + "content": "<|LOC_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100408": { + "content": "<|LOC_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100409": { + "content": "<|LOC_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100410": { + "content": "<|LOC_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100411": { + "content": "<|LOC_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100412": { + "content": "<|LOC_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100413": { + "content": "<|LOC_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100414": { + "content": "<|LOC_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100415": { + "content": "<|LOC_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100416": { + "content": "<|LOC_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100417": { + "content": "<|LOC_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100418": { + "content": "<|LOC_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100419": { + "content": "<|LOC_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100420": { + "content": "<|LOC_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100421": { + "content": "<|LOC_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100422": { + "content": "<|LOC_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100423": { + "content": "<|LOC_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100424": { + "content": "<|LOC_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100425": { + "content": "<|LOC_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100426": { + "content": "<|LOC_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100427": { + "content": "<|LOC_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100428": { + "content": "<|LOC_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100429": { + "content": "<|LOC_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100430": { + "content": "<|LOC_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100431": { + "content": "<|LOC_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100432": { + "content": "<|LOC_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100433": { + "content": "<|LOC_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100434": { + "content": "<|LOC_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100435": { + "content": "<|LOC_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100436": { + "content": "<|LOC_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100437": { + "content": "<|LOC_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100438": { + "content": "<|LOC_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100439": { + "content": "<|LOC_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100440": { + "content": "<|LOC_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100441": { + "content": "<|LOC_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100442": { + "content": "<|LOC_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100443": { + "content": "<|LOC_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100444": { + "content": "<|LOC_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100445": { + "content": "<|LOC_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100446": { + "content": "<|LOC_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100447": { + "content": "<|LOC_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100448": { + "content": "<|LOC_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100449": { + "content": "<|LOC_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100450": { + "content": "<|LOC_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100451": { + "content": "<|LOC_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100452": { + "content": "<|LOC_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100453": { + "content": "<|LOC_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100454": { + "content": "<|LOC_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100455": { + "content": "<|LOC_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100456": { + "content": "<|LOC_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100457": { + "content": "<|LOC_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100458": { + "content": "<|LOC_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100459": { + "content": "<|LOC_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100460": { + "content": "<|LOC_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100461": { + "content": "<|LOC_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100462": { + "content": "<|LOC_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100463": { + "content": "<|LOC_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100464": { + "content": "<|LOC_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100465": { + "content": "<|LOC_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100466": { + "content": "<|LOC_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100467": { + "content": "<|LOC_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100468": { + "content": "<|LOC_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100469": { + "content": "<|LOC_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100470": { + "content": "<|LOC_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100471": { + "content": "<|LOC_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100472": { + "content": "<|LOC_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100473": { + "content": "<|LOC_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100474": { + "content": "<|LOC_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100475": { + "content": "<|LOC_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100476": { + "content": "<|LOC_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100477": { + "content": "<|LOC_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100478": { + "content": "<|LOC_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100479": { + "content": "<|LOC_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100480": { + "content": "<|LOC_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100481": { + "content": "<|LOC_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100482": { + "content": "<|LOC_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100483": { + "content": "<|LOC_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100484": { + "content": "<|LOC_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100485": { + "content": "<|LOC_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100486": { + "content": "<|LOC_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100487": { + "content": "<|LOC_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100488": { + "content": "<|LOC_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100489": { + "content": "<|LOC_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100490": { + "content": "<|LOC_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100491": { + "content": "<|LOC_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100492": { + "content": "<|LOC_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100493": { + "content": "<|LOC_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100494": { + "content": "<|LOC_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100495": { + "content": "<|LOC_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100496": { + "content": "<|LOC_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100497": { + "content": "<|LOC_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100498": { + "content": "<|LOC_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100499": { + "content": "<|LOC_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100500": { + "content": "<|LOC_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100501": { + "content": "<|LOC_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100502": { + "content": "<|LOC_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100503": { + "content": "<|LOC_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100504": { + "content": "<|LOC_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100505": { + "content": "<|LOC_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100506": { + "content": "<|LOC_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100507": { + "content": "<|LOC_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100508": { + "content": "<|LOC_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100509": { + "content": "<|LOC_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100510": { + "content": "<|LOC_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100511": { + "content": "<|LOC_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100512": { + "content": "<|LOC_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100513": { + "content": "<|LOC_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100514": { + "content": "<|LOC_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100515": { + "content": "<|LOC_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100516": { + "content": "<|LOC_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100517": { + "content": "<|LOC_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100518": { + "content": "<|LOC_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100519": { + "content": "<|LOC_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100520": { + "content": "<|LOC_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100521": { + "content": "<|LOC_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100522": { + "content": "<|LOC_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100523": { + "content": "<|LOC_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100524": { + "content": "<|LOC_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100525": { + "content": "<|LOC_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100526": { + "content": "<|LOC_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100527": { + "content": "<|LOC_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100528": { + "content": "<|LOC_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100529": { + "content": "<|LOC_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100530": { + "content": "<|LOC_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100531": { + "content": "<|LOC_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100532": { + "content": "<|LOC_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100533": { + "content": "<|LOC_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100534": { + "content": "<|LOC_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100535": { + "content": "<|LOC_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100536": { + "content": "<|LOC_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100537": { + "content": "<|LOC_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100538": { + "content": "<|LOC_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100539": { + "content": "<|LOC_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100540": { + "content": "<|LOC_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100541": { + "content": "<|LOC_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100542": { + "content": "<|LOC_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100543": { + "content": "<|LOC_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100544": { + "content": "<|LOC_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100545": { + "content": "<|LOC_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100546": { + "content": "<|LOC_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100547": { + "content": "<|LOC_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100548": { + "content": "<|LOC_251|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100549": { + "content": "<|LOC_252|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100550": { + "content": "<|LOC_253|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100551": { + "content": "<|LOC_254|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100552": { + "content": "<|LOC_255|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100553": { + "content": "<|LOC_256|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100554": { + "content": "<|LOC_257|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100555": { + "content": "<|LOC_258|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100556": { + "content": "<|LOC_259|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100557": { + "content": "<|LOC_260|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100558": { + "content": "<|LOC_261|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100559": { + "content": "<|LOC_262|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100560": { + "content": "<|LOC_263|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100561": { + "content": "<|LOC_264|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100562": { + "content": "<|LOC_265|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100563": { + "content": "<|LOC_266|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100564": { + "content": "<|LOC_267|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100565": { + "content": "<|LOC_268|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100566": { + "content": "<|LOC_269|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100567": { + "content": "<|LOC_270|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100568": { + "content": "<|LOC_271|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100569": { + "content": "<|LOC_272|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100570": { + "content": "<|LOC_273|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100571": { + "content": "<|LOC_274|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100572": { + "content": "<|LOC_275|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100573": { + "content": "<|LOC_276|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100574": { + "content": "<|LOC_277|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100575": { + "content": "<|LOC_278|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100576": { + "content": "<|LOC_279|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100577": { + "content": "<|LOC_280|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100578": { + "content": "<|LOC_281|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100579": { + "content": "<|LOC_282|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100580": { + "content": "<|LOC_283|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100581": { + "content": "<|LOC_284|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100582": { + "content": "<|LOC_285|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100583": { + "content": "<|LOC_286|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100584": { + "content": "<|LOC_287|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100585": { + "content": "<|LOC_288|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100586": { + "content": "<|LOC_289|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100587": { + "content": "<|LOC_290|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100588": { + "content": "<|LOC_291|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100589": { + "content": "<|LOC_292|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100590": { + "content": "<|LOC_293|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100591": { + "content": "<|LOC_294|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100592": { + "content": "<|LOC_295|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100593": { + "content": "<|LOC_296|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100594": { + "content": "<|LOC_297|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100595": { + "content": "<|LOC_298|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100596": { + "content": "<|LOC_299|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100597": { + "content": "<|LOC_300|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100598": { + "content": "<|LOC_301|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100599": { + "content": "<|LOC_302|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100600": { + "content": "<|LOC_303|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100601": { + "content": "<|LOC_304|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100602": { + "content": "<|LOC_305|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100603": { + "content": "<|LOC_306|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100604": { + "content": "<|LOC_307|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100605": { + "content": "<|LOC_308|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100606": { + "content": "<|LOC_309|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100607": { + "content": "<|LOC_310|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100608": { + "content": "<|LOC_311|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100609": { + "content": "<|LOC_312|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100610": { + "content": "<|LOC_313|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100611": { + "content": "<|LOC_314|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100612": { + "content": "<|LOC_315|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100613": { + "content": "<|LOC_316|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100614": { + "content": "<|LOC_317|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100615": { + "content": "<|LOC_318|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100616": { + "content": "<|LOC_319|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100617": { + "content": "<|LOC_320|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100618": { + "content": "<|LOC_321|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100619": { + "content": "<|LOC_322|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100620": { + "content": "<|LOC_323|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100621": { + "content": "<|LOC_324|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100622": { + "content": "<|LOC_325|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100623": { + "content": "<|LOC_326|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100624": { + "content": "<|LOC_327|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100625": { + "content": "<|LOC_328|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100626": { + "content": "<|LOC_329|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100627": { + "content": "<|LOC_330|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100628": { + "content": "<|LOC_331|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100629": { + "content": "<|LOC_332|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100630": { + "content": "<|LOC_333|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100631": { + "content": "<|LOC_334|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100632": { + "content": "<|LOC_335|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100633": { + "content": "<|LOC_336|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100634": { + "content": "<|LOC_337|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100635": { + "content": "<|LOC_338|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100636": { + "content": "<|LOC_339|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100637": { + "content": "<|LOC_340|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100638": { + "content": "<|LOC_341|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100639": { + "content": "<|LOC_342|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100640": { + "content": "<|LOC_343|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100641": { + "content": "<|LOC_344|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100642": { + "content": "<|LOC_345|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100643": { + "content": "<|LOC_346|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100644": { + "content": "<|LOC_347|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100645": { + "content": "<|LOC_348|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100646": { + "content": "<|LOC_349|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100647": { + "content": "<|LOC_350|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100648": { + "content": "<|LOC_351|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100649": { + "content": "<|LOC_352|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100650": { + "content": "<|LOC_353|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100651": { + "content": "<|LOC_354|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100652": { + "content": "<|LOC_355|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100653": { + "content": "<|LOC_356|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100654": { + "content": "<|LOC_357|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100655": { + "content": "<|LOC_358|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100656": { + "content": "<|LOC_359|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100657": { + "content": "<|LOC_360|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100658": { + "content": "<|LOC_361|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100659": { + "content": "<|LOC_362|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100660": { + "content": "<|LOC_363|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100661": { + "content": "<|LOC_364|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100662": { + "content": "<|LOC_365|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100663": { + "content": "<|LOC_366|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100664": { + "content": "<|LOC_367|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100665": { + "content": "<|LOC_368|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100666": { + "content": "<|LOC_369|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100667": { + "content": "<|LOC_370|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100668": { + "content": "<|LOC_371|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100669": { + "content": "<|LOC_372|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100670": { + "content": "<|LOC_373|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100671": { + "content": "<|LOC_374|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100672": { + "content": "<|LOC_375|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100673": { + "content": "<|LOC_376|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100674": { + "content": "<|LOC_377|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100675": { + "content": "<|LOC_378|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100676": { + "content": "<|LOC_379|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100677": { + "content": "<|LOC_380|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100678": { + "content": "<|LOC_381|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100679": { + "content": "<|LOC_382|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100680": { + "content": "<|LOC_383|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100681": { + "content": "<|LOC_384|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100682": { + "content": "<|LOC_385|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100683": { + "content": "<|LOC_386|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100684": { + "content": "<|LOC_387|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100685": { + "content": "<|LOC_388|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100686": { + "content": "<|LOC_389|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100687": { + "content": "<|LOC_390|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100688": { + "content": "<|LOC_391|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100689": { + "content": "<|LOC_392|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100690": { + "content": "<|LOC_393|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100691": { + "content": "<|LOC_394|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100692": { + "content": "<|LOC_395|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100693": { + "content": "<|LOC_396|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100694": { + "content": "<|LOC_397|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100695": { + "content": "<|LOC_398|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100696": { + "content": "<|LOC_399|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100697": { + "content": "<|LOC_400|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100698": { + "content": "<|LOC_401|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100699": { + "content": "<|LOC_402|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100700": { + "content": "<|LOC_403|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100701": { + "content": "<|LOC_404|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100702": { + "content": "<|LOC_405|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100703": { + "content": "<|LOC_406|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100704": { + "content": "<|LOC_407|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100705": { + "content": "<|LOC_408|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100706": { + "content": "<|LOC_409|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100707": { + "content": "<|LOC_410|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100708": { + "content": "<|LOC_411|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100709": { + "content": "<|LOC_412|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100710": { + "content": "<|LOC_413|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100711": { + "content": "<|LOC_414|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100712": { + "content": "<|LOC_415|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100713": { + "content": "<|LOC_416|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100714": { + "content": "<|LOC_417|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100715": { + "content": "<|LOC_418|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100716": { + "content": "<|LOC_419|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100717": { + "content": "<|LOC_420|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100718": { + "content": "<|LOC_421|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100719": { + "content": "<|LOC_422|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100720": { + "content": "<|LOC_423|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100721": { + "content": "<|LOC_424|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100722": { + "content": "<|LOC_425|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100723": { + "content": "<|LOC_426|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100724": { + "content": "<|LOC_427|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100725": { + "content": "<|LOC_428|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100726": { + "content": "<|LOC_429|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100727": { + "content": "<|LOC_430|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100728": { + "content": "<|LOC_431|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100729": { + "content": "<|LOC_432|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100730": { + "content": "<|LOC_433|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100731": { + "content": "<|LOC_434|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100732": { + "content": "<|LOC_435|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100733": { + "content": "<|LOC_436|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100734": { + "content": "<|LOC_437|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100735": { + "content": "<|LOC_438|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100736": { + "content": "<|LOC_439|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100737": { + "content": "<|LOC_440|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100738": { + "content": "<|LOC_441|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100739": { + "content": "<|LOC_442|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100740": { + "content": "<|LOC_443|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100741": { + "content": "<|LOC_444|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100742": { + "content": "<|LOC_445|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100743": { + "content": "<|LOC_446|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100744": { + "content": "<|LOC_447|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100745": { + "content": "<|LOC_448|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100746": { + "content": "<|LOC_449|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100747": { + "content": "<|LOC_450|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100748": { + "content": "<|LOC_451|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100749": { + "content": "<|LOC_452|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100750": { + "content": "<|LOC_453|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100751": { + "content": "<|LOC_454|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100752": { + "content": "<|LOC_455|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100753": { + "content": "<|LOC_456|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100754": { + "content": "<|LOC_457|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100755": { + "content": "<|LOC_458|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100756": { + "content": "<|LOC_459|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100757": { + "content": "<|LOC_460|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100758": { + "content": "<|LOC_461|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100759": { + "content": "<|LOC_462|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100760": { + "content": "<|LOC_463|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100761": { + "content": "<|LOC_464|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100762": { + "content": "<|LOC_465|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100763": { + "content": "<|LOC_466|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100764": { + "content": "<|LOC_467|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100765": { + "content": "<|LOC_468|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100766": { + "content": "<|LOC_469|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100767": { + "content": "<|LOC_470|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100768": { + "content": "<|LOC_471|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100769": { + "content": "<|LOC_472|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100770": { + "content": "<|LOC_473|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100771": { + "content": "<|LOC_474|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100772": { + "content": "<|LOC_475|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100773": { + "content": "<|LOC_476|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100774": { + "content": "<|LOC_477|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100775": { + "content": "<|LOC_478|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100776": { + "content": "<|LOC_479|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100777": { + "content": "<|LOC_480|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100778": { + "content": "<|LOC_481|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100779": { + "content": "<|LOC_482|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100780": { + "content": "<|LOC_483|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100781": { + "content": "<|LOC_484|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100782": { + "content": "<|LOC_485|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100783": { + "content": "<|LOC_486|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100784": { + "content": "<|LOC_487|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100785": { + "content": "<|LOC_488|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100786": { + "content": "<|LOC_489|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100787": { + "content": "<|LOC_490|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100788": { + "content": "<|LOC_491|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100789": { + "content": "<|LOC_492|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100790": { + "content": "<|LOC_493|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100791": { + "content": "<|LOC_494|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100792": { + "content": "<|LOC_495|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100793": { + "content": "<|LOC_496|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100794": { + "content": "<|LOC_497|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100795": { + "content": "<|LOC_498|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100796": { + "content": "<|LOC_499|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100797": { + "content": "<|LOC_500|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100798": { + "content": "<|LOC_501|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100799": { + "content": "<|LOC_502|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100800": { + "content": "<|LOC_503|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100801": { + "content": "<|LOC_504|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100802": { + "content": "<|LOC_505|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100803": { + "content": "<|LOC_506|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100804": { + "content": "<|LOC_507|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100805": { + "content": "<|LOC_508|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100806": { + "content": "<|LOC_509|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100807": { + "content": "<|LOC_510|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100808": { + "content": "<|LOC_511|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100809": { + "content": "<|LOC_512|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100810": { + "content": "<|LOC_513|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100811": { + "content": "<|LOC_514|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100812": { + "content": "<|LOC_515|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100813": { + "content": "<|LOC_516|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100814": { + "content": "<|LOC_517|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100815": { + "content": "<|LOC_518|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100816": { + "content": "<|LOC_519|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100817": { + "content": "<|LOC_520|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100818": { + "content": "<|LOC_521|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100819": { + "content": "<|LOC_522|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100820": { + "content": "<|LOC_523|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100821": { + "content": "<|LOC_524|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100822": { + "content": "<|LOC_525|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100823": { + "content": "<|LOC_526|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100824": { + "content": "<|LOC_527|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100825": { + "content": "<|LOC_528|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100826": { + "content": "<|LOC_529|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100827": { + "content": "<|LOC_530|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100828": { + "content": "<|LOC_531|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100829": { + "content": "<|LOC_532|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100830": { + "content": "<|LOC_533|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100831": { + "content": "<|LOC_534|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100832": { + "content": "<|LOC_535|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100833": { + "content": "<|LOC_536|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100834": { + "content": "<|LOC_537|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100835": { + "content": "<|LOC_538|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100836": { + "content": "<|LOC_539|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100837": { + "content": "<|LOC_540|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100838": { + "content": "<|LOC_541|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100839": { + "content": "<|LOC_542|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100840": { + "content": "<|LOC_543|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100841": { + "content": "<|LOC_544|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100842": { + "content": "<|LOC_545|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100843": { + "content": "<|LOC_546|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100844": { + "content": "<|LOC_547|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100845": { + "content": "<|LOC_548|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100846": { + "content": "<|LOC_549|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100847": { + "content": "<|LOC_550|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100848": { + "content": "<|LOC_551|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100849": { + "content": "<|LOC_552|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100850": { + "content": "<|LOC_553|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100851": { + "content": "<|LOC_554|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100852": { + "content": "<|LOC_555|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100853": { + "content": "<|LOC_556|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100854": { + "content": "<|LOC_557|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100855": { + "content": "<|LOC_558|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100856": { + "content": "<|LOC_559|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100857": { + "content": "<|LOC_560|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100858": { + "content": "<|LOC_561|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100859": { + "content": "<|LOC_562|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100860": { + "content": "<|LOC_563|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100861": { + "content": "<|LOC_564|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100862": { + "content": "<|LOC_565|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100863": { + "content": "<|LOC_566|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100864": { + "content": "<|LOC_567|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100865": { + "content": "<|LOC_568|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100866": { + "content": "<|LOC_569|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100867": { + "content": "<|LOC_570|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100868": { + "content": "<|LOC_571|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100869": { + "content": "<|LOC_572|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100870": { + "content": "<|LOC_573|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100871": { + "content": "<|LOC_574|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100872": { + "content": "<|LOC_575|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100873": { + "content": "<|LOC_576|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100874": { + "content": "<|LOC_577|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100875": { + "content": "<|LOC_578|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100876": { + "content": "<|LOC_579|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100877": { + "content": "<|LOC_580|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100878": { + "content": "<|LOC_581|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100879": { + "content": "<|LOC_582|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100880": { + "content": "<|LOC_583|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100881": { + "content": "<|LOC_584|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100882": { + "content": "<|LOC_585|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100883": { + "content": "<|LOC_586|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100884": { + "content": "<|LOC_587|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100885": { + "content": "<|LOC_588|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100886": { + "content": "<|LOC_589|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100887": { + "content": "<|LOC_590|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100888": { + "content": "<|LOC_591|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100889": { + "content": "<|LOC_592|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100890": { + "content": "<|LOC_593|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100891": { + "content": "<|LOC_594|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100892": { + "content": "<|LOC_595|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100893": { + "content": "<|LOC_596|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100894": { + "content": "<|LOC_597|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100895": { + "content": "<|LOC_598|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100896": { + "content": "<|LOC_599|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100897": { + "content": "<|LOC_600|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100898": { + "content": "<|LOC_601|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100899": { + "content": "<|LOC_602|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100900": { + "content": "<|LOC_603|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100901": { + "content": "<|LOC_604|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100902": { + "content": "<|LOC_605|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100903": { + "content": "<|LOC_606|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100904": { + "content": "<|LOC_607|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100905": { + "content": "<|LOC_608|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100906": { + "content": "<|LOC_609|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100907": { + "content": "<|LOC_610|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100908": { + "content": "<|LOC_611|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100909": { + "content": "<|LOC_612|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100910": { + "content": "<|LOC_613|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100911": { + "content": "<|LOC_614|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100912": { + "content": "<|LOC_615|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100913": { + "content": "<|LOC_616|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100914": { + "content": "<|LOC_617|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100915": { + "content": "<|LOC_618|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100916": { + "content": "<|LOC_619|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100917": { + "content": "<|LOC_620|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100918": { + "content": "<|LOC_621|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100919": { + "content": "<|LOC_622|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100920": { + "content": "<|LOC_623|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100921": { + "content": "<|LOC_624|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100922": { + "content": "<|LOC_625|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100923": { + "content": "<|LOC_626|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100924": { + "content": "<|LOC_627|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100925": { + "content": "<|LOC_628|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100926": { + "content": "<|LOC_629|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100927": { + "content": "<|LOC_630|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100928": { + "content": "<|LOC_631|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100929": { + "content": "<|LOC_632|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100930": { + "content": "<|LOC_633|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100931": { + "content": "<|LOC_634|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100932": { + "content": "<|LOC_635|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100933": { + "content": "<|LOC_636|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100934": { + "content": "<|LOC_637|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100935": { + "content": "<|LOC_638|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100936": { + "content": "<|LOC_639|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100937": { + "content": "<|LOC_640|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100938": { + "content": "<|LOC_641|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100939": { + "content": "<|LOC_642|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100940": { + "content": "<|LOC_643|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100941": { + "content": "<|LOC_644|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100942": { + "content": "<|LOC_645|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100943": { + "content": "<|LOC_646|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100944": { + "content": "<|LOC_647|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100945": { + "content": "<|LOC_648|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100946": { + "content": "<|LOC_649|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100947": { + "content": "<|LOC_650|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100948": { + "content": "<|LOC_651|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100949": { + "content": "<|LOC_652|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100950": { + "content": "<|LOC_653|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100951": { + "content": "<|LOC_654|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100952": { + "content": "<|LOC_655|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100953": { + "content": "<|LOC_656|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100954": { + "content": "<|LOC_657|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100955": { + "content": "<|LOC_658|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100956": { + "content": "<|LOC_659|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100957": { + "content": "<|LOC_660|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100958": { + "content": "<|LOC_661|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100959": { + "content": "<|LOC_662|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100960": { + "content": "<|LOC_663|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100961": { + "content": "<|LOC_664|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100962": { + "content": "<|LOC_665|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100963": { + "content": "<|LOC_666|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100964": { + "content": "<|LOC_667|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100965": { + "content": "<|LOC_668|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100966": { + "content": "<|LOC_669|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100967": { + "content": "<|LOC_670|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100968": { + "content": "<|LOC_671|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100969": { + "content": "<|LOC_672|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100970": { + "content": "<|LOC_673|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100971": { + "content": "<|LOC_674|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100972": { + "content": "<|LOC_675|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100973": { + "content": "<|LOC_676|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100974": { + "content": "<|LOC_677|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100975": { + "content": "<|LOC_678|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100976": { + "content": "<|LOC_679|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100977": { + "content": "<|LOC_680|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100978": { + "content": "<|LOC_681|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100979": { + "content": "<|LOC_682|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100980": { + "content": "<|LOC_683|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100981": { + "content": "<|LOC_684|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100982": { + "content": "<|LOC_685|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100983": { + "content": "<|LOC_686|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100984": { + "content": "<|LOC_687|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100985": { + "content": "<|LOC_688|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100986": { + "content": "<|LOC_689|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100987": { + "content": "<|LOC_690|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100988": { + "content": "<|LOC_691|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100989": { + "content": "<|LOC_692|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100990": { + "content": "<|LOC_693|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100991": { + "content": "<|LOC_694|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100992": { + "content": "<|LOC_695|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100993": { + "content": "<|LOC_696|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100994": { + "content": "<|LOC_697|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100995": { + "content": "<|LOC_698|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100996": { + "content": "<|LOC_699|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100997": { + "content": "<|LOC_700|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100998": { + "content": "<|LOC_701|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100999": { + "content": "<|LOC_702|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101000": { + "content": "<|LOC_703|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101001": { + "content": "<|LOC_704|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101002": { + "content": "<|LOC_705|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101003": { + "content": "<|LOC_706|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101004": { + "content": "<|LOC_707|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101005": { + "content": "<|LOC_708|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101006": { + "content": "<|LOC_709|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101007": { + "content": "<|LOC_710|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101008": { + "content": "<|LOC_711|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101009": { + "content": "<|LOC_712|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101010": { + "content": "<|LOC_713|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101011": { + "content": "<|LOC_714|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101012": { + "content": "<|LOC_715|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101013": { + "content": "<|LOC_716|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101014": { + "content": "<|LOC_717|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101015": { + "content": "<|LOC_718|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101016": { + "content": "<|LOC_719|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101017": { + "content": "<|LOC_720|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101018": { + "content": "<|LOC_721|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101019": { + "content": "<|LOC_722|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101020": { + "content": "<|LOC_723|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101021": { + "content": "<|LOC_724|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101022": { + "content": "<|LOC_725|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101023": { + "content": "<|LOC_726|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101024": { + "content": "<|LOC_727|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101025": { + "content": "<|LOC_728|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101026": { + "content": "<|LOC_729|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101027": { + "content": "<|LOC_730|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101028": { + "content": "<|LOC_731|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101029": { + "content": "<|LOC_732|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101030": { + "content": "<|LOC_733|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101031": { + "content": "<|LOC_734|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101032": { + "content": "<|LOC_735|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101033": { + "content": "<|LOC_736|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101034": { + "content": "<|LOC_737|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101035": { + "content": "<|LOC_738|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101036": { + "content": "<|LOC_739|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101037": { + "content": "<|LOC_740|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101038": { + "content": "<|LOC_741|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101039": { + "content": "<|LOC_742|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101040": { + "content": "<|LOC_743|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101041": { + "content": "<|LOC_744|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101042": { + "content": "<|LOC_745|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101043": { + "content": "<|LOC_746|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101044": { + "content": "<|LOC_747|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101045": { + "content": "<|LOC_748|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101046": { + "content": "<|LOC_749|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101047": { + "content": "<|LOC_750|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101048": { + "content": "<|LOC_751|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101049": { + "content": "<|LOC_752|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101050": { + "content": "<|LOC_753|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101051": { + "content": "<|LOC_754|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101052": { + "content": "<|LOC_755|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101053": { + "content": "<|LOC_756|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101054": { + "content": "<|LOC_757|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101055": { + "content": "<|LOC_758|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101056": { + "content": "<|LOC_759|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101057": { + "content": "<|LOC_760|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101058": { + "content": "<|LOC_761|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101059": { + "content": "<|LOC_762|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101060": { + "content": "<|LOC_763|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101061": { + "content": "<|LOC_764|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101062": { + "content": "<|LOC_765|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101063": { + "content": "<|LOC_766|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101064": { + "content": "<|LOC_767|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101065": { + "content": "<|LOC_768|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101066": { + "content": "<|LOC_769|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101067": { + "content": "<|LOC_770|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101068": { + "content": "<|LOC_771|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101069": { + "content": "<|LOC_772|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101070": { + "content": "<|LOC_773|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101071": { + "content": "<|LOC_774|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101072": { + "content": "<|LOC_775|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101073": { + "content": "<|LOC_776|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101074": { + "content": "<|LOC_777|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101075": { + "content": "<|LOC_778|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101076": { + "content": "<|LOC_779|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101077": { + "content": "<|LOC_780|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101078": { + "content": "<|LOC_781|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101079": { + "content": "<|LOC_782|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101080": { + "content": "<|LOC_783|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101081": { + "content": "<|LOC_784|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101082": { + "content": "<|LOC_785|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101083": { + "content": "<|LOC_786|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101084": { + "content": "<|LOC_787|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101085": { + "content": "<|LOC_788|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101086": { + "content": "<|LOC_789|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101087": { + "content": "<|LOC_790|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101088": { + "content": "<|LOC_791|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101089": { + "content": "<|LOC_792|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101090": { + "content": "<|LOC_793|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101091": { + "content": "<|LOC_794|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101092": { + "content": "<|LOC_795|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101093": { + "content": "<|LOC_796|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101094": { + "content": "<|LOC_797|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101095": { + "content": "<|LOC_798|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101096": { + "content": "<|LOC_799|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101097": { + "content": "<|LOC_800|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101098": { + "content": "<|LOC_801|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101099": { + "content": "<|LOC_802|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101100": { + "content": "<|LOC_803|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101101": { + "content": "<|LOC_804|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101102": { + "content": "<|LOC_805|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101103": { + "content": "<|LOC_806|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101104": { + "content": "<|LOC_807|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101105": { + "content": "<|LOC_808|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101106": { + "content": "<|LOC_809|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101107": { + "content": "<|LOC_810|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101108": { + "content": "<|LOC_811|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101109": { + "content": "<|LOC_812|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101110": { + "content": "<|LOC_813|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101111": { + "content": "<|LOC_814|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101112": { + "content": "<|LOC_815|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101113": { + "content": "<|LOC_816|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101114": { + "content": "<|LOC_817|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101115": { + "content": "<|LOC_818|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101116": { + "content": "<|LOC_819|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101117": { + "content": "<|LOC_820|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101118": { + "content": "<|LOC_821|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101119": { + "content": "<|LOC_822|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101120": { + "content": "<|LOC_823|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101121": { + "content": "<|LOC_824|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101122": { + "content": "<|LOC_825|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101123": { + "content": "<|LOC_826|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101124": { + "content": "<|LOC_827|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101125": { + "content": "<|LOC_828|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101126": { + "content": "<|LOC_829|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101127": { + "content": "<|LOC_830|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101128": { + "content": "<|LOC_831|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101129": { + "content": "<|LOC_832|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101130": { + "content": "<|LOC_833|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101131": { + "content": "<|LOC_834|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101132": { + "content": "<|LOC_835|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101133": { + "content": "<|LOC_836|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101134": { + "content": "<|LOC_837|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101135": { + "content": "<|LOC_838|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101136": { + "content": "<|LOC_839|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101137": { + "content": "<|LOC_840|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101138": { + "content": "<|LOC_841|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101139": { + "content": "<|LOC_842|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101140": { + "content": "<|LOC_843|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101141": { + "content": "<|LOC_844|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101142": { + "content": "<|LOC_845|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101143": { + "content": "<|LOC_846|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101144": { + "content": "<|LOC_847|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101145": { + "content": "<|LOC_848|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101146": { + "content": "<|LOC_849|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101147": { + "content": "<|LOC_850|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101148": { + "content": "<|LOC_851|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101149": { + "content": "<|LOC_852|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101150": { + "content": "<|LOC_853|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101151": { + "content": "<|LOC_854|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101152": { + "content": "<|LOC_855|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101153": { + "content": "<|LOC_856|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101154": { + "content": "<|LOC_857|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101155": { + "content": "<|LOC_858|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101156": { + "content": "<|LOC_859|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101157": { + "content": "<|LOC_860|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101158": { + "content": "<|LOC_861|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101159": { + "content": "<|LOC_862|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101160": { + "content": "<|LOC_863|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101161": { + "content": "<|LOC_864|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101162": { + "content": "<|LOC_865|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101163": { + "content": "<|LOC_866|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101164": { + "content": "<|LOC_867|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101165": { + "content": "<|LOC_868|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101166": { + "content": "<|LOC_869|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101167": { + "content": "<|LOC_870|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101168": { + "content": "<|LOC_871|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101169": { + "content": "<|LOC_872|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101170": { + "content": "<|LOC_873|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101171": { + "content": "<|LOC_874|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101172": { + "content": "<|LOC_875|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101173": { + "content": "<|LOC_876|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101174": { + "content": "<|LOC_877|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101175": { + "content": "<|LOC_878|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101176": { + "content": "<|LOC_879|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101177": { + "content": "<|LOC_880|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101178": { + "content": "<|LOC_881|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101179": { + "content": "<|LOC_882|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101180": { + "content": "<|LOC_883|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101181": { + "content": "<|LOC_884|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101182": { + "content": "<|LOC_885|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101183": { + "content": "<|LOC_886|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101184": { + "content": "<|LOC_887|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101185": { + "content": "<|LOC_888|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101186": { + "content": "<|LOC_889|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101187": { + "content": "<|LOC_890|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101188": { + "content": "<|LOC_891|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101189": { + "content": "<|LOC_892|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101190": { + "content": "<|LOC_893|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101191": { + "content": "<|LOC_894|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101192": { + "content": "<|LOC_895|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101193": { + "content": "<|LOC_896|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101194": { + "content": "<|LOC_897|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101195": { + "content": "<|LOC_898|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101196": { + "content": "<|LOC_899|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101197": { + "content": "<|LOC_900|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101198": { + "content": "<|LOC_901|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101199": { + "content": "<|LOC_902|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101200": { + "content": "<|LOC_903|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101201": { + "content": "<|LOC_904|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101202": { + "content": "<|LOC_905|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101203": { + "content": "<|LOC_906|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101204": { + "content": "<|LOC_907|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101205": { + "content": "<|LOC_908|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101206": { + "content": "<|LOC_909|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101207": { + "content": "<|LOC_910|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101208": { + "content": "<|LOC_911|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101209": { + "content": "<|LOC_912|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101210": { + "content": "<|LOC_913|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101211": { + "content": "<|LOC_914|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101212": { + "content": "<|LOC_915|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101213": { + "content": "<|LOC_916|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101214": { + "content": "<|LOC_917|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101215": { + "content": "<|LOC_918|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101216": { + "content": "<|LOC_919|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101217": { + "content": "<|LOC_920|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101218": { + "content": "<|LOC_921|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101219": { + "content": "<|LOC_922|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101220": { + "content": "<|LOC_923|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101221": { + "content": "<|LOC_924|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101222": { + "content": "<|LOC_925|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101223": { + "content": "<|LOC_926|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101224": { + "content": "<|LOC_927|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101225": { + "content": "<|LOC_928|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101226": { + "content": "<|LOC_929|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101227": { + "content": "<|LOC_930|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101228": { + "content": "<|LOC_931|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101229": { + "content": "<|LOC_932|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101230": { + "content": "<|LOC_933|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101231": { + "content": "<|LOC_934|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101232": { + "content": "<|LOC_935|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101233": { + "content": "<|LOC_936|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101234": { + "content": "<|LOC_937|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101235": { + "content": "<|LOC_938|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101236": { + "content": "<|LOC_939|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101237": { + "content": "<|LOC_940|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101238": { + "content": "<|LOC_941|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101239": { + "content": "<|LOC_942|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101240": { + "content": "<|LOC_943|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101241": { + "content": "<|LOC_944|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101242": { + "content": "<|LOC_945|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101243": { + "content": "<|LOC_946|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101244": { + "content": "<|LOC_947|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101245": { + "content": "<|LOC_948|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101246": { + "content": "<|LOC_949|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101247": { + "content": "<|LOC_950|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101248": { + "content": "<|LOC_951|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101249": { + "content": "<|LOC_952|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101250": { + "content": "<|LOC_953|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101251": { + "content": "<|LOC_954|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101252": { + "content": "<|LOC_955|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101253": { + "content": "<|LOC_956|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101254": { + "content": "<|LOC_957|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101255": { + "content": "<|LOC_958|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101256": { + "content": "<|LOC_959|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101257": { + "content": "<|LOC_960|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101258": { + "content": "<|LOC_961|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101259": { + "content": "<|LOC_962|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101260": { + "content": "<|LOC_963|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101261": { + "content": "<|LOC_964|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101262": { + "content": "<|LOC_965|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101263": { + "content": "<|LOC_966|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101264": { + "content": "<|LOC_967|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101265": { + "content": "<|LOC_968|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101266": { + "content": "<|LOC_969|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101267": { + "content": "<|LOC_970|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101268": { + "content": "<|LOC_971|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101269": { + "content": "<|LOC_972|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101270": { + "content": "<|LOC_973|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101271": { + "content": "<|LOC_974|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101272": { + "content": "<|LOC_975|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101273": { + "content": "<|LOC_976|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101274": { + "content": "<|LOC_977|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101275": { + "content": "<|LOC_978|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101276": { + "content": "<|LOC_979|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101277": { + "content": "<|LOC_980|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101278": { + "content": "<|LOC_981|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101279": { + "content": "<|LOC_982|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101280": { + "content": "<|LOC_983|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101281": { + "content": "<|LOC_984|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101282": { + "content": "<|LOC_985|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101283": { + "content": "<|LOC_986|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101284": { + "content": "<|LOC_987|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101285": { + "content": "<|LOC_988|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101286": { + "content": "<|LOC_989|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101287": { + "content": "<|LOC_990|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101288": { + "content": "<|LOC_991|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101289": { + "content": "<|LOC_992|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101290": { + "content": "<|LOC_993|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101291": { + "content": "<|LOC_994|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101292": { + "content": "<|LOC_995|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101293": { + "content": "<|LOC_996|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101294": { + "content": "<|LOC_997|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101295": { + "content": "<|LOC_998|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101296": { + "content": "<|LOC_999|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101297": { + "content": "<|LOC_1000|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101298": { + "content": "<|LOC_BEGIN|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101299": { + "content": "<|LOC_END|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101300": { + "content": "<|LOC_SEP|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101301": { + "content": "<|CROP_COL_SEP|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101302": { + "content": "<|CROP_ROW_SEP|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101303": { + "content": "<|IMAGE_SEP|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101304": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101305": { + "content": "<|IMAGE_START|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101306": { + "content": "<|IMAGE_END|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101307": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101308": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101309": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101310": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101311": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101312": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101313": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|IMAGE_PLACEHOLDER|>", + "<|image_pad|>", + "<|IMAGE_START|>", + "<|IMAGE_END|>", + "<|video_pad|>" + ], + "auto_map": { + "AutoProcessor": "processing_paddleocr_vl.PaddleOCRVLProcessor" + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "<|begin_of_sentence|>", + "eos_token": "", + "extra_special_tokens": {}, + "image_token": "<|IMAGE_PLACEHOLDER|>", + "legacy": true, + "mask_token": "", + "model_max_length": 131072, + "pad_token": "", + "processor_class": "PaddleOCRVLProcessor", + "sep_token": "<|end_of_sentence|>", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}