LlamaRoboticsTokenizer / tokenizer_config.json
kgrabko's picture
updated upload of JiRackTokenizer with improvement for Robotics and Routing tags
703ce8f verified
Raw
History Blame Contribute Delete
2.64 kB
{
"backend": "tokenizers",
"bos_token": "<|begin_of_text|>",
"clean_up_tokenization_spaces": true,
"eos_token": "<|end_of_text|>",
"extra_special_tokens": [
"<|image|>",
"<|video|>",
"<|sound|>",
"<|voice|>",
"<|listening|>",
"<|vision|>",
"<|mood_happy|>",
"<|mood_sad|>",
"<|mood_angry|>",
"<|mood_neutral|>",
"<fim_prefix>",
"<fim_middle>",
"<fim_suffix>",
"<|action_start|>",
"<|action_end|>",
"<|trajectory_start|>",
"<|trajectory_end|>",
"<|joint_start|>",
"<|joint_end|>",
"<|sensor_start|>",
"<|sensor_end|>",
"<|command_start|>",
"<|command_end|>",
"<|state_start|>",
"<|state_end|>",
"<|pose|>",
"<|velocity|>",
"<|force|>",
"<|torque|>",
"<|gripper|>",
"<|navigation|>",
"<|obstacle|>",
"<|task_start|>",
"<|task_end|>",
"<|plan_start|>",
"<|plan_end|>",
"<|behavior_start|>",
"<|behavior_end|>",
"<|skill_start|>",
"<|skill_end|>",
"<|motor|>",
"<|servo|>",
"<|imu|>",
"<|lidar|>",
"<|camera|>",
"<|depth|>",
"<|waypoint|>",
"<|path|>",
"<|collision|>",
"<|grasp|>",
"<|release|>",
"<|homing|>",
"<|emergency_stop|>",
"<|calibration|>",
"<|manipulation|>",
"<|locomotion|>",
"<|feedback|>",
"<|control_loop|>",
"__SCIENCE__",
"__CODING__",
"__STOCK_EXCHANGE__",
"__MEDICINE__",
"__GOVERNMENT__",
"__NEWS__",
"__GENERAL__",
"__MATERIAL_SCIENCE__",
"__ELECTRONICS__",
"__MICROELECTRONICS__",
"__ENGINEERING__",
"__ROBOTICS__",
"__ENERGY__",
"__AUTOMOTIVE__",
"__AVIATION__",
"__MATH__",
"__PYTHON__",
"__C__",
"__CPP__",
"__C_SHARP__",
"__JAVA__",
"__JAVASCRIPT__",
"__TYPESCRIPT__",
"__RUST__",
"__GO__",
"__RUBY__",
"__PHP__",
"__SWIFT__",
"__KOTLIN__",
"__BASH__",
"__SQL__",
"__ASSEMBLY__",
"__PHILOSOPHY__",
"__LITERATURE__",
"__SOCIOLOGY__",
"__PSYCHOLOGY__",
"__POLITICAL_SCIENCE__",
"__CULTURAL_STUDIES__",
"__ETHNOGRAPHY__",
"__HUMAN_RIGHTS__",
"__COMPLIANCE__",
"__MILITARY__",
"__BANKING__",
"__OIL_INDUSTRY__",
"__LIGHT_INDUSTRY__",
"__NATURE__",
"__OCEAN__",
"__SPORT__",
"__CULINARY__",
"__TRAVEL__",
"__HOBBY__"
],
"is_local": true,
"local_files_only": false,
"model_input_names": [
"input_ids",
"attention_mask"
],
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<|end_of_text|>",
"tokenizer_class": "TokenizersBackend"
}