tejeshbhalla commited on
Commit
f307d1f
·
verified ·
1 Parent(s): 26ee45c

Training in progress, step 10

Browse files
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "/root/highspeedstorage/ft-volume/tool_call_model_v2",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
@@ -12,7 +12,7 @@
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
- "lora_alpha": 128,
16
  "lora_bias": false,
17
  "lora_dropout": 0.01,
18
  "megatron_config": null,
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "gate_proj",
27
- "v_proj",
28
- "up_proj",
29
  "o_proj",
30
- "q_proj",
31
- "k_proj",
32
- "down_proj"
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "/root/highspeedstorage/ft-volume/llama-base",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
 
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
+ "lora_alpha": 256,
16
  "lora_bias": false,
17
  "lora_dropout": 0.01,
18
  "megatron_config": null,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "q_proj",
27
  "gate_proj",
 
 
28
  "o_proj",
29
+ "down_proj",
30
+ "up_proj",
31
+ "v_proj",
32
+ "k_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6128f839d981bdcaf57a6a571d99d9a7e1db74b3f6fb34d0f65afb523342ee36
3
  size 13254157312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f511a37d700aeb75f9a130794aa2ca48eababe7bf6871d22693baf45bc7ce3e
3
  size 13254157312
args.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "model": "/root/highspeedstorage/ft-volume/tool_call_model_v2",
3
  "model_type": "llama3_2",
4
  "model_revision": null,
5
  "task_type": "causal_lm",
@@ -11,7 +11,7 @@
11
  "local_repo_path": null,
12
  "template": "llama3_2",
13
  "system": "",
14
- "max_length": 15000,
15
  "truncation_strategy": "left",
16
  "max_pixels": null,
17
  "tools_prompt": "react_en",
@@ -22,7 +22,7 @@
22
  "use_chat_template": true,
23
  "template_backend": "swift",
24
  "dataset": [
25
- "dpo_data.jsonl"
26
  ],
27
  "val_dataset": [],
28
  "split_dataset_ratio": 0.03,
@@ -33,6 +33,7 @@
33
  "download_mode": "reuse_dataset_if_exists",
34
  "columns": {},
35
  "strict": false,
 
36
  "model_name": [
37
  null,
38
  null
@@ -50,7 +51,7 @@
50
  "bnb_4bit_use_double_quant": true,
51
  "bnb_4bit_quant_storage": null,
52
  "max_new_tokens": 64,
53
- "temperature": 0.7,
54
  "top_k": null,
55
  "top_p": null,
56
  "repetition_penalty": null,
@@ -59,7 +60,7 @@
59
  "stop_words": [],
60
  "logprobs": false,
61
  "top_logprobs": null,
62
- "ckpt_dir": "/root/highspeedstorage/ft-volume/tool_call_model_v2",
63
  "load_dataset_config": null,
64
  "lora_modules": [],
65
  "tuner_backend": "peft",
@@ -67,14 +68,14 @@
67
  "adapters": [],
68
  "seed": 42,
69
  "model_kwargs": {},
70
- "load_args": true,
71
  "load_data_args": false,
72
  "use_hf": true,
73
  "hub_token": null,
74
  "custom_register_path": [],
75
  "ignore_args_error": false,
76
  "use_swift_lora": false,
77
- "output_dir": "/root/dataDisk/output/v43-20250307-192741",
78
  "overwrite_output_dir": false,
79
  "do_train": false,
80
  "do_eval": false,
@@ -85,7 +86,7 @@
85
  "per_device_eval_batch_size": 1,
86
  "per_gpu_train_batch_size": null,
87
  "per_gpu_eval_batch_size": null,
88
- "gradient_accumulation_steps": 4,
89
  "eval_accumulation_steps": null,
90
  "eval_delay": 0,
91
  "torch_empty_cache_steps": null,
@@ -104,7 +105,7 @@
104
  "log_level": "passive",
105
  "log_level_replica": "warning",
106
  "log_on_each_node": true,
107
- "logging_dir": "/root/dataDisk/output/v43-20250307-192741/runs",
108
  "logging_strategy": "steps",
109
  "logging_first_step": true,
110
  "logging_steps": 1,
@@ -140,7 +141,6 @@
140
  "past_index": -1,
141
  "run_name": null,
142
  "disable_tqdm": null,
143
- "remove_unused_columns": false,
144
  "label_names": null,
145
  "load_best_model_at_end": false,
146
  "metric_for_best_model": "loss",
@@ -263,7 +263,7 @@
263
  "target_regex": null,
264
  "modules_to_save": [],
265
  "lora_rank": 512,
266
- "lora_alpha": 128,
267
  "lora_dropout": 0.01,
268
  "lora_bias": "none",
269
  "lora_dtype": null,
@@ -326,51 +326,29 @@
326
  "metric_warmup_step": 0,
327
  "fsdp_num": 1,
328
  "acc_steps": 1,
 
 
 
 
 
329
  "add_version": true,
330
  "resume_only_model": false,
331
  "check_model": true,
332
  "create_checkpoint_symlink": false,
333
  "packing": false,
334
  "lazy_tokenize": false,
335
- "loss_type": "sigmoid",
 
336
  "optimizer": null,
337
  "metric": null,
338
  "acc_strategy": "token",
339
- "reward_model": null,
340
- "reward_adapters": [],
341
- "reward_model_type": null,
342
- "reward_model_revision": null,
343
- "num_ppo_epochs": 4,
344
- "whiten_rewards": false,
345
- "kl_coef": 0.05,
346
- "cliprange": 0.2,
347
- "vf_coef": 0.1,
348
- "cliprange_value": 0.2,
349
- "gamma": 1.0,
350
- "lam": 0.95,
351
- "num_mini_batches": 1,
352
- "local_rollout_forward_batch_size": 64,
353
- "num_sample_generations": 10,
354
- "response_length": 512,
355
- "missing_eos_penalty": null,
356
- "rlhf_type": "dpo",
357
- "ref_model": null,
358
- "ref_model_type": null,
359
- "ref_model_revision": null,
360
- "beta": 0.1,
361
- "label_smoothing": 0,
362
- "rpo_alpha": 1.0,
363
- "cpo_alpha": 1.0,
364
- "simpo_gamma": 1,
365
- "desirable_weight": 1.0,
366
- "undesirable_weight": 1.0,
367
  "rank": 0,
368
  "global_world_size": 8,
369
  "local_world_size": 8,
370
- "model_suffix": "tool_call_model_v2",
371
- "model_info": "ModelInfo(model_type='llama3_2', model_dir='/root/highspeedstorage/ft-volume/tool_call_model_v2', torch_dtype=torch.bfloat16, max_model_len=128000, quant_method=None, quant_bits=None, config={'factor': 8.0, 'high_freq_factor': 4.0, 'low_freq_factor': 1.0, 'original_max_position_embeddings': 8192, 'rope_type': 'llama3'}, task_type='causal_lm', num_labels=None)",
372
- "model_meta": "ModelMeta(model_type='llama3_2', model_groups=[ModelGroup(models=[Model(ms_model_id='LLM-Research/Llama-3.2-1B', hf_model_id='meta-llama/Llama-3.2-1B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-3B', hf_model_id='meta-llama/Llama-3.2-3B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-1B-Instruct', hf_model_id='meta-llama/Llama-3.2-1B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-3B-Instruct', hf_model_id='meta-llama/Llama-3.2-3B-Instruct', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[]), ModelGroup(models=[Model(ms_model_id='LLM-Research/Llama-3.3-70B-Instruct', hf_model_id='meta-llama/Llama-3.3-70B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='unsloth/Llama-3.3-70B-Instruct-bnb-4bit', hf_model_id='unsloth/Llama-3.3-70B-Instruct-bnb-4bit', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='llama3_2', get_function=<function get_model_tokenizer_with_flash_attn at 0x7fe7ed990280>, model_arch='llama', architectures=['LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=['transformers>=4.45'], tags=[])",
373
- "model_dir": "/root/highspeedstorage/ft-volume/tool_call_model_v2",
374
  "hub": "<class 'swift.hub.hub.HFHub'>",
375
- "training_args": "DPOConfig(output_dir='/root/dataDisk/output/v43-20250307-192741', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=<IntervalStrategy.STEPS: 'steps'>, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=4, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=1e-05, weight_decay=0.01, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=1.0, max_steps=-1, lr_scheduler_type=<SchedulerType.LINEAR: 'linear'>, lr_scheduler_kwargs=None, warmup_ratio=0.1, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/root/dataDisk/output/v43-20250307-192741/runs', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=True, logging_steps=1, logging_nan_inf_filter=True, save_strategy=<SaveStrategy.STEPS: 'steps'>, save_steps=10, save_total_limit=2, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=0, dataloader_prefetch_factor=None, past_index=-1, run_name='/root/dataDisk/output/v43-20250307-192741', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'cpu', 'pin_memory': True}, 'offload_param': {'device': 'cpu', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=<OptimizerNames.ADAMW_TORCH: 'adamw_torch'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['wandb'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=True, resume_from_checkpoint=None, hub_model_id='TheAgenticAI/LLAMA-3.3-70B-Reasoning', hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=True, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs={'use_reentrant': True}, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, model_init_kwargs=None, ref_model_init_kwargs=None, model_adapter_name=None, ref_adapter_name=None, force_use_ref_model=False, disable_dropout=True, use_num_logits_to_keep=False, dataset_num_proc=1, padding_value=None, label_pad_token_id=None, max_prompt_length=512, max_completion_length=None, max_length=15000, truncation_mode='keep_end', padding_free=False, precompute_ref_log_probs=False, precompute_ref_batch_size=None, tools=None, loss_type='sigmoid', beta=0.1, f_divergence_type=<FDivergenceType.REVERSE_KL: 'reverse_kl'>, f_alpha_divergence_coef=1.0, reference_free=False, label_smoothing=0, use_weighting=False, rpo_alpha=1.0, discopop_tau=0.05, sync_ref_model=False, ref_model_mixup_alpha=0.9, ref_model_sync_steps=64, generate_during_eval=False, is_encoder_decoder=False, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)"
376
  }
 
1
  {
2
+ "model": "/root/highspeedstorage/ft-volume/llama-base",
3
  "model_type": "llama3_2",
4
  "model_revision": null,
5
  "task_type": "causal_lm",
 
11
  "local_repo_path": null,
12
  "template": "llama3_2",
13
  "system": "",
14
+ "max_length": 22000,
15
  "truncation_strategy": "left",
16
  "max_pixels": null,
17
  "tools_prompt": "react_en",
 
22
  "use_chat_template": true,
23
  "template_backend": "swift",
24
  "dataset": [
25
+ "new_data_new.jsonl"
26
  ],
27
  "val_dataset": [],
28
  "split_dataset_ratio": 0.03,
 
33
  "download_mode": "reuse_dataset_if_exists",
34
  "columns": {},
35
  "strict": false,
36
+ "remove_unused_columns": true,
37
  "model_name": [
38
  null,
39
  null
 
51
  "bnb_4bit_use_double_quant": true,
52
  "bnb_4bit_quant_storage": null,
53
  "max_new_tokens": 64,
54
+ "temperature": 0.0,
55
  "top_k": null,
56
  "top_p": null,
57
  "repetition_penalty": null,
 
60
  "stop_words": [],
61
  "logprobs": false,
62
  "top_logprobs": null,
63
+ "ckpt_dir": null,
64
  "load_dataset_config": null,
65
  "lora_modules": [],
66
  "tuner_backend": "peft",
 
68
  "adapters": [],
69
  "seed": 42,
70
  "model_kwargs": {},
71
+ "load_args": false,
72
  "load_data_args": false,
73
  "use_hf": true,
74
  "hub_token": null,
75
  "custom_register_path": [],
76
  "ignore_args_error": false,
77
  "use_swift_lora": false,
78
+ "output_dir": "/root/dataDisk/output/v53-20250310-062732",
79
  "overwrite_output_dir": false,
80
  "do_train": false,
81
  "do_eval": false,
 
86
  "per_device_eval_batch_size": 1,
87
  "per_gpu_train_batch_size": null,
88
  "per_gpu_eval_batch_size": null,
89
+ "gradient_accumulation_steps": 1,
90
  "eval_accumulation_steps": null,
91
  "eval_delay": 0,
92
  "torch_empty_cache_steps": null,
 
105
  "log_level": "passive",
106
  "log_level_replica": "warning",
107
  "log_on_each_node": true,
108
+ "logging_dir": "/root/dataDisk/output/v53-20250310-062732/runs",
109
  "logging_strategy": "steps",
110
  "logging_first_step": true,
111
  "logging_steps": 1,
 
141
  "past_index": -1,
142
  "run_name": null,
143
  "disable_tqdm": null,
 
144
  "label_names": null,
145
  "load_best_model_at_end": false,
146
  "metric_for_best_model": "loss",
 
263
  "target_regex": null,
264
  "modules_to_save": [],
265
  "lora_rank": 512,
266
+ "lora_alpha": 256,
267
  "lora_dropout": 0.01,
268
  "lora_bias": "none",
269
  "lora_dtype": null,
 
326
  "metric_warmup_step": 0,
327
  "fsdp_num": 1,
328
  "acc_steps": 1,
329
+ "swanlab_token": null,
330
+ "swanlab_project": null,
331
+ "swanlab_workspace": null,
332
+ "swanlab_exp_name": null,
333
+ "swanlab_mode": "cloud",
334
  "add_version": true,
335
  "resume_only_model": false,
336
  "check_model": true,
337
  "create_checkpoint_symlink": false,
338
  "packing": false,
339
  "lazy_tokenize": false,
340
+ "external_plugins": [],
341
+ "loss_type": null,
342
  "optimizer": null,
343
  "metric": null,
344
  "acc_strategy": "token",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  "rank": 0,
346
  "global_world_size": 8,
347
  "local_world_size": 8,
348
+ "model_suffix": "llama-base",
349
+ "model_info": "ModelInfo(model_type='llama3_2', model_dir='/root/highspeedstorage/ft-volume/llama-base', torch_dtype=torch.bfloat16, max_model_len=131072, quant_method=None, quant_bits=None, rope_scaling={'factor': 8.0, 'high_freq_factor': 4.0, 'low_freq_factor': 1.0, 'original_max_position_embeddings': 8192, 'rope_type': 'llama3'}, config=None, task_type='causal_lm', num_labels=None)",
350
+ "model_meta": "ModelMeta(model_type='llama3_2', model_groups=[ModelGroup(models=[Model(ms_model_id='LLM-Research/Llama-3.2-1B', hf_model_id='meta-llama/Llama-3.2-1B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-3B', hf_model_id='meta-llama/Llama-3.2-3B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-1B-Instruct', hf_model_id='meta-llama/Llama-3.2-1B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='LLM-Research/Llama-3.2-3B-Instruct', hf_model_id='meta-llama/Llama-3.2-3B-Instruct', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[]), ModelGroup(models=[Model(ms_model_id='LLM-Research/Llama-3.3-70B-Instruct', hf_model_id='meta-llama/Llama-3.3-70B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='unsloth/Llama-3.3-70B-Instruct-bnb-4bit', hf_model_id='unsloth/Llama-3.3-70B-Instruct-bnb-4bit', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='llama3_2', get_function=<function get_model_tokenizer_with_flash_attn at 0x7f8d0252f760>, model_arch='llama', architectures=['LlamaForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, task_type=None, ignore_patterns=[], requires=['transformers>=4.45'], tags=[])",
351
+ "model_dir": "/root/highspeedstorage/ft-volume/llama-base",
352
  "hub": "<class 'swift.hub.hub.HFHub'>",
353
+ "training_args": "Seq2SeqTrainingArguments(output_dir='/root/dataDisk/output/v53-20250310-062732', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=<IntervalStrategy.STEPS: 'steps'>, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=1e-05, weight_decay=0.01, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=1.0, max_steps=-1, lr_scheduler_type=<SchedulerType.LINEAR: 'linear'>, lr_scheduler_kwargs=None, warmup_ratio=0.1, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/root/dataDisk/output/v53-20250310-062732/runs', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=True, logging_steps=1, logging_nan_inf_filter=True, save_strategy=<SaveStrategy.STEPS: 'steps'>, save_steps=10, save_total_limit=2, save_safetensors=True, save_on_each_node=False, save_only_model=False, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=10, dataloader_num_workers=0, dataloader_prefetch_factor=None, past_index=-1, run_name='/root/dataDisk/output/v53-20250310-062732', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'cpu', 'pin_memory': True}, 'offload_param': {'device': 'cpu', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=<OptimizerNames.ADAMW_TORCH: 'adamw_torch'>, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['wandb'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=True, resume_from_checkpoint=None, hub_model_id='TheAgenticAI/LLAMA-3.3-70B-Reasoning', hub_strategy=<HubStrategy.EVERY_SAVE: 'every_save'>, hub_token=None, hub_private_repo=True, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs={'use_reentrant': True}, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy='steps', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, sortish_sampler=False, predict_with_generate=False, generation_max_length=None, generation_num_beams=None, generation_config=None, acc_strategy='token', sequence_parallel_size=1, check_model=True, train_sampler_random=True, is_encoder_decoder=False, metric_warmup_step=0, train_dataset_sample=-1, fsdp_num=1, acc_steps=1, train_type='lora', optimizer=None, galore_config=None)"
354
  }
logging.jsonl CHANGED
@@ -1,92 +1,24 @@
1
- {"loss": 1.19787598, "grad_norm": 16.50406456, "learning_rate": 1.11e-06, "memory(GiB)": 80.38, "train_speed(iter/s)": 0.003465, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -944.0, "logps/rejected": -981.0, "logits/chosen": 0.11959839, "logits/rejected": 0.14052582, "nll_loss": 0.50634766, "epoch": 0.01215805, "global_step/max_steps": "1/82", "percentage": "1.22%", "elapsed_time": "4m 28s", "remaining_time": "6h 2m 29s"}
2
- {"loss": 1.19799805, "grad_norm": 17.04649734, "learning_rate": 2.22e-06, "memory(GiB)": 133.32, "train_speed(iter/s)": 0.003697, "rewards/chosen": 0.0, "rewards/rejected": 0.0, "rewards/accuracies": 0.0, "rewards/margins": 0.0, "logps/chosen": -600.0, "logps/rejected": -640.0, "logits/chosen": -0.0045166, "logits/rejected": 0.0723877, "nll_loss": 0.50634766, "epoch": 0.02431611, "global_step/max_steps": "2/82", "percentage": "2.44%", "elapsed_time": "8m 40s", "remaining_time": "5h 47m 14s"}
3
- {"loss": 1.16052246, "grad_norm": 16.47639656, "learning_rate": 3.33e-06, "memory(GiB)": 133.32, "train_speed(iter/s)": 0.00385, "rewards/chosen": -0.07815552, "rewards/rejected": -0.15332031, "rewards/accuracies": 0.4375, "rewards/margins": 0.07499695, "logps/chosen": -732.0, "logps/rejected": -711.5, "logits/chosen": 0.01654053, "logits/rejected": 0.04000854, "nll_loss": 0.49365234, "epoch": 0.03647416, "global_step/max_steps": "3/82", "percentage": "3.66%", "elapsed_time": "12m 39s", "remaining_time": "5h 33m 7s"}
4
- {"loss": 0.79382324, "grad_norm": 12.2022419, "learning_rate": 4.44e-06, "memory(GiB)": 133.32, "train_speed(iter/s)": 0.003969, "rewards/chosen": 0.16601562, "rewards/rejected": -0.94677734, "rewards/accuracies": 0.90625, "rewards/margins": 1.11328125, "logps/chosen": -659.0, "logps/rejected": -731.5, "logits/chosen": 0.03723907, "logits/rejected": 0.08538818, "nll_loss": 0.44628906, "epoch": 0.04863222, "global_step/max_steps": "4/82", "percentage": "4.88%", "elapsed_time": "16m 27s", "remaining_time": "5h 21m 2s"}
5
- {"loss": 0.8069458, "grad_norm": 5.88278675, "learning_rate": 5.56e-06, "memory(GiB)": 133.32, "train_speed(iter/s)": 0.004072, "rewards/chosen": -0.50585938, "rewards/rejected": -3.01367188, "rewards/accuracies": 0.90625, "rewards/margins": 2.51367188, "logps/chosen": -657.0, "logps/rejected": -648.5, "logits/chosen": 0.04119873, "logits/rejected": 0.06147385, "nll_loss": 0.54492188, "epoch": 0.06079027, "global_step/max_steps": "5/82", "percentage": "6.10%", "elapsed_time": "20m 7s", "remaining_time": "5h 9m 57s"}
6
- {"loss": 0.7019043, "grad_norm": 6.32576942, "learning_rate": 6.67e-06, "memory(GiB)": 133.32, "train_speed(iter/s)": 0.004001, "rewards/chosen": -0.51867676, "rewards/rejected": -5.21875, "rewards/accuracies": 0.90625, "rewards/margins": 4.69140625, "logps/chosen": -675.5, "logps/rejected": -768.0, "logits/chosen": 0.03408813, "logits/rejected": 0.15429688, "nll_loss": 0.53369141, "epoch": 0.07294833, "global_step/max_steps": "6/82", "percentage": "7.32%", "elapsed_time": "24m 39s", "remaining_time": "5h 12m 19s"}
7
- {"loss": 0.70428467, "grad_norm": 7.52466726, "learning_rate": 7.78e-06, "memory(GiB)": 133.32, "train_speed(iter/s)": 0.003951, "rewards/chosen": -1.19824219, "rewards/rejected": -9.5, "rewards/accuracies": 0.9375, "rewards/margins": 8.3046875, "logps/chosen": -1255.0, "logps/rejected": -1390.0, "logits/chosen": 0.17871094, "logits/rejected": 0.25048828, "nll_loss": 0.51855469, "epoch": 0.08510638, "global_step/max_steps": "7/82", "percentage": "8.54%", "elapsed_time": "29m 11s", "remaining_time": "5h 12m 46s"}
8
- {"loss": 1.31051636, "grad_norm": 6.65762138, "learning_rate": 8.89e-06, "memory(GiB)": 133.32, "train_speed(iter/s)": 0.004068, "rewards/chosen": -5.765625, "rewards/rejected": -11.40625, "rewards/accuracies": 0.875, "rewards/margins": 5.640625, "logps/chosen": -801.0, "logps/rejected": -806.0, "logits/chosen": 0.10290527, "logits/rejected": 0.13476562, "nll_loss": 0.51855469, "epoch": 0.09726444, "global_step/max_steps": "8/82", "percentage": "9.76%", "elapsed_time": "32m 26s", "remaining_time": "5h 0m 3s"}
9
- {"loss": 0.74472046, "grad_norm": 3.59503341, "learning_rate": 1e-05, "memory(GiB)": 133.32, "train_speed(iter/s)": 0.004106, "rewards/chosen": -3.87109375, "rewards/rejected": -11.3125, "rewards/accuracies": 0.875, "rewards/margins": 7.453125, "logps/chosen": -866.0, "logps/rejected": -892.0, "logits/chosen": 0.15454102, "logits/rejected": 0.18896484, "nll_loss": 0.53222656, "epoch": 0.10942249, "global_step/max_steps": "9/82", "percentage": "10.98%", "elapsed_time": "36m 11s", "remaining_time": "4h 53m 34s"}
10
- {"loss": 0.89154053, "grad_norm": 7.31847811, "learning_rate": 9.86e-06, "memory(GiB)": 133.32, "train_speed(iter/s)": 0.004017, "rewards/chosen": -4.015625, "rewards/rejected": -12.34375, "rewards/accuracies": 0.84375, "rewards/margins": 8.3515625, "logps/chosen": -934.0, "logps/rejected": -996.0, "logits/chosen": 0.2109375, "logits/rejected": 0.24267578, "nll_loss": 0.52539062, "epoch": 0.12158055, "global_step/max_steps": "10/82", "percentage": "12.20%", "elapsed_time": "41m 9s", "remaining_time": "4h 56m 20s"}
11
- {"eval_loss": 0.74573207, "eval_runtime": 285.3241, "eval_samples_per_second": 0.284, "eval_steps_per_second": 0.039, "eval_rewards/chosen": -4.13938189, "eval_rewards/rejected": -14.715909, "eval_rewards/accuracies": 0.90909094, "eval_rewards/margins": 10.57102299, "eval_logps/chosen": -832.90911865, "eval_logps/rejected": -964.0, "eval_logits/chosen": 0.12888406, "eval_logits/rejected": 0.22145773, "eval_nll_loss": 0.51047587, "epoch": 0.12158055, "global_step/max_steps": "10/82", "percentage": "12.20%", "elapsed_time": "45m 54s", "remaining_time": "5h 30m 34s"}
12
- {"loss": 0.72991943, "grad_norm": 4.70781326, "learning_rate": 9.73e-06, "memory(GiB)": 133.32, "train_speed(iter/s)": 0.003602, "rewards/chosen": -3.80273438, "rewards/rejected": -13.15625, "rewards/accuracies": 0.875, "rewards/margins": 9.328125, "logps/chosen": -651.5, "logps/rejected": -762.0, "logits/chosen": 0.11218262, "logits/rejected": 0.17333984, "nll_loss": 0.50683594, "epoch": 0.1337386, "global_step/max_steps": "11/82", "percentage": "13.41%", "elapsed_time": "50m 34s", "remaining_time": "5h 26m 23s"}
13
- {"loss": 1.034729, "grad_norm": 8.77729416, "learning_rate": 9.59e-06, "memory(GiB)": 133.32, "train_speed(iter/s)": 0.003644, "rewards/chosen": -3.68554688, "rewards/rejected": -12.390625, "rewards/accuracies": 0.8125, "rewards/margins": 8.68359375, "logps/chosen": -844.0, "logps/rejected": -864.0, "logits/chosen": 0.23168945, "logits/rejected": 0.24194336, "nll_loss": 0.54931641, "epoch": 0.14589666, "global_step/max_steps": "12/82", "percentage": "14.63%", "elapsed_time": "54m 33s", "remaining_time": "5h 18m 14s"}
14
- {"loss": 0.58480835, "grad_norm": 5.16248894, "learning_rate": 9.45e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003653, "rewards/chosen": -0.49804688, "rewards/rejected": -12.9375, "rewards/accuracies": 0.96875, "rewards/margins": 12.453125, "logps/chosen": -1011.0, "logps/rejected": -1295.0, "logits/chosen": 0.20397949, "logits/rejected": 0.31152344, "nll_loss": 0.48876953, "epoch": 0.15805471, "global_step/max_steps": "13/82", "percentage": "15.85%", "elapsed_time": "58m 58s", "remaining_time": "5h 13m 3s"}
15
- {"loss": 0.90736389, "grad_norm": 8.01303196, "learning_rate": 9.32e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003722, "rewards/chosen": -2.93359375, "rewards/rejected": -10.5546875, "rewards/accuracies": 0.875, "rewards/margins": 7.609375, "logps/chosen": -687.0, "logps/rejected": -767.0, "logits/chosen": 0.16589355, "logits/rejected": 0.22924805, "nll_loss": 0.51953125, "epoch": 0.17021277, "global_step/max_steps": "14/82", "percentage": "17.07%", "elapsed_time": "1h 2m 21s", "remaining_time": "5h 2m 51s"}
16
- {"loss": 0.4855957, "grad_norm": 1.1846683, "learning_rate": 9.18e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003773, "rewards/chosen": -0.03491211, "rewards/rejected": -10.84375, "rewards/accuracies": 1.0, "rewards/margins": 10.8125, "logps/chosen": -714.0, "logps/rejected": -850.0, "logits/chosen": 0.14648438, "logits/rejected": 0.23071289, "nll_loss": 0.46923828, "epoch": 0.18237082, "global_step/max_steps": "15/82", "percentage": "18.29%", "elapsed_time": "1h 5m 56s", "remaining_time": "4h 54m 30s"}
17
- {"loss": 0.5256958, "grad_norm": 3.70943999, "learning_rate": 9.04e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003775, "rewards/chosen": -0.1887207, "rewards/rejected": -9.96875, "rewards/accuracies": 0.96875, "rewards/margins": 9.734375, "logps/chosen": -771.0, "logps/rejected": -914.0, "logits/chosen": 0.22906494, "logits/rejected": 0.29882812, "nll_loss": 0.49267578, "epoch": 0.19452888, "global_step/max_steps": "16/82", "percentage": "19.51%", "elapsed_time": "1h 10m 18s", "remaining_time": "4h 50m 1s"}
18
- {"loss": 0.65368652, "grad_norm": 5.11949921, "learning_rate": 8.9e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003783, "rewards/chosen": 1.3125, "rewards/rejected": -7.734375, "rewards/accuracies": 0.90625, "rewards/margins": 9.0546875, "logps/chosen": -1055.0, "logps/rejected": -1124.0, "logits/chosen": 0.1787262, "logits/rejected": 0.21850586, "nll_loss": 0.51269531, "epoch": 0.20668693, "global_step/max_steps": "17/82", "percentage": "20.73%", "elapsed_time": "1h 14m 33s", "remaining_time": "4h 45m 4s"}
19
- {"loss": 0.57885742, "grad_norm": 3.5943768, "learning_rate": 8.77e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003834, "rewards/chosen": -0.43359375, "rewards/rejected": -7.6328125, "rewards/accuracies": 0.90625, "rewards/margins": 7.1796875, "logps/chosen": -686.5, "logps/rejected": -718.5, "logits/chosen": 0.18432617, "logits/rejected": 0.21826172, "nll_loss": 0.45703125, "epoch": 0.21884498, "global_step/max_steps": "18/82", "percentage": "21.95%", "elapsed_time": "1h 17m 54s", "remaining_time": "4h 37m 0s"}
20
- {"loss": 0.65594482, "grad_norm": 5.29098272, "learning_rate": 8.63e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003853, "rewards/chosen": 0.14794922, "rewards/rejected": -8.046875, "rewards/accuracies": 0.90625, "rewards/margins": 8.1875, "logps/chosen": -798.0, "logps/rejected": -876.0, "logits/chosen": 0.20483398, "logits/rejected": 0.23950195, "nll_loss": 0.48925781, "epoch": 0.23100304, "global_step/max_steps": "19/82", "percentage": "23.17%", "elapsed_time": "1h 21m 51s", "remaining_time": "4h 31m 24s"}
21
- {"loss": 0.5333252, "grad_norm": 2.12160182, "learning_rate": 8.49e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003848, "rewards/chosen": 0.51953125, "rewards/rejected": -8.4140625, "rewards/accuracies": 0.96875, "rewards/margins": 8.9296875, "logps/chosen": -746.0, "logps/rejected": -887.0, "logits/chosen": 0.18365479, "logits/rejected": 0.26000977, "nll_loss": 0.47265625, "epoch": 0.24316109, "global_step/max_steps": "20/82", "percentage": "24.39%", "elapsed_time": "1h 26m 16s", "remaining_time": "4h 27m 27s"}
22
- {"eval_loss": 0.55660444, "eval_runtime": 284.7419, "eval_samples_per_second": 0.284, "eval_steps_per_second": 0.039, "eval_rewards/chosen": 1.36292613, "eval_rewards/rejected": -8.43323898, "eval_rewards/accuracies": 0.95454544, "eval_rewards/margins": 9.80823898, "eval_logps/chosen": -777.90911865, "eval_logps/rejected": -901.45452881, "eval_logits/chosen": 0.12178178, "eval_logits/rejected": 0.21435547, "eval_nll_loss": 0.46129262, "epoch": 0.24316109, "global_step/max_steps": "20/82", "percentage": "24.39%", "elapsed_time": "1h 31m 1s", "remaining_time": "4h 42m 10s"}
23
- {"loss": 0.47488403, "grad_norm": 1.36670411, "learning_rate": 8.36e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003652, "rewards/chosen": 1.59960938, "rewards/rejected": -6.9609375, "rewards/accuracies": 1.0, "rewards/margins": 8.5625, "logps/chosen": -760.0, "logps/rejected": -919.0, "logits/chosen": 0.18579102, "logits/rejected": 0.26098633, "nll_loss": 0.44873047, "epoch": 0.25531915, "global_step/max_steps": "21/82", "percentage": "25.61%", "elapsed_time": "1h 35m 30s", "remaining_time": "4h 37m 26s"}
24
- {"loss": 0.54403687, "grad_norm": 2.88501763, "learning_rate": 8.22e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003658, "rewards/chosen": 1.99414062, "rewards/rejected": -5.16796875, "rewards/accuracies": 0.96875, "rewards/margins": 7.171875, "logps/chosen": -722.0, "logps/rejected": -781.0, "logits/chosen": 0.16625977, "logits/rejected": 0.2097168, "nll_loss": 0.47998047, "epoch": 0.2674772, "global_step/max_steps": "22/82", "percentage": "26.83%", "elapsed_time": "1h 39m 53s", "remaining_time": "4h 32m 25s"}
25
- {"loss": 0.4927063, "grad_norm": 1.41164339, "learning_rate": 8.08e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003661, "rewards/chosen": 3.55126953, "rewards/rejected": -5.53515625, "rewards/accuracies": 1.0, "rewards/margins": 9.09375, "logps/chosen": -862.0, "logps/rejected": -889.0, "logits/chosen": 0.17346191, "logits/rejected": 0.18591309, "nll_loss": 0.48046875, "epoch": 0.27963526, "global_step/max_steps": "23/82", "percentage": "28.05%", "elapsed_time": "1h 44m 22s", "remaining_time": "4h 27m 43s"}
26
- {"loss": 0.60314941, "grad_norm": 4.07823467, "learning_rate": 7.95e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003659, "rewards/chosen": 2.88476562, "rewards/rejected": -3.7265625, "rewards/accuracies": 0.9375, "rewards/margins": 6.609375, "logps/chosen": -962.0, "logps/rejected": -1057.0, "logits/chosen": 0.1776123, "logits/rejected": 0.22363281, "nll_loss": 0.48681641, "epoch": 0.29179331, "global_step/max_steps": "24/82", "percentage": "29.27%", "elapsed_time": "1h 48m 59s", "remaining_time": "4h 23m 22s"}
27
- {"loss": 0.54199219, "grad_norm": 4.04096413, "learning_rate": 7.81e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003662, "rewards/chosen": 2.37182617, "rewards/rejected": -5.65625, "rewards/accuracies": 0.96875, "rewards/margins": 8.03125, "logps/chosen": -639.0, "logps/rejected": -739.0, "logits/chosen": 0.21166992, "logits/rejected": 0.23413086, "nll_loss": 0.45849609, "epoch": 0.30395137, "global_step/max_steps": "25/82", "percentage": "30.49%", "elapsed_time": "1h 53m 26s", "remaining_time": "4h 18m 38s"}
28
- {"loss": 0.61772156, "grad_norm": 3.02750874, "learning_rate": 7.67e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003661, "rewards/chosen": 1.60644531, "rewards/rejected": -5.8046875, "rewards/accuracies": 0.96875, "rewards/margins": 7.3984375, "logps/chosen": -646.0, "logps/rejected": -738.0, "logits/chosen": 0.12518311, "logits/rejected": 0.18804932, "nll_loss": 0.46630859, "epoch": 0.31610942, "global_step/max_steps": "26/82", "percentage": "31.71%", "elapsed_time": "1h 58m 1s", "remaining_time": "4h 14m 12s"}
29
- {"loss": 0.48553467, "grad_norm": 0.30965665, "learning_rate": 7.53e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003651, "rewards/chosen": 4.29394531, "rewards/rejected": -5.21044922, "rewards/accuracies": 1.0, "rewards/margins": 9.4921875, "logps/chosen": -705.0, "logps/rejected": -742.0, "logits/chosen": 0.097229, "logits/rejected": 0.13226318, "nll_loss": 0.48046875, "epoch": 0.32826748, "global_step/max_steps": "27/82", "percentage": "32.93%", "elapsed_time": "2h 2m 55s", "remaining_time": "4h 10m 23s"}
30
- {"loss": 0.4508667, "grad_norm": 0.30843318, "learning_rate": 7.4e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003667, "rewards/chosen": 4.0390625, "rewards/rejected": -4.56640625, "rewards/accuracies": 1.0, "rewards/margins": 8.59375, "logps/chosen": -807.0, "logps/rejected": -855.0, "logits/chosen": 0.18164062, "logits/rejected": 0.20336914, "nll_loss": 0.4453125, "epoch": 0.34042553, "global_step/max_steps": "28/82", "percentage": "34.15%", "elapsed_time": "2h 6m 55s", "remaining_time": "4h 4m 47s"}
31
- {"loss": 0.45654297, "grad_norm": 0.65378851, "learning_rate": 7.26e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003658, "rewards/chosen": 4.63671875, "rewards/rejected": -4.25, "rewards/accuracies": 1.0, "rewards/margins": 8.8828125, "logps/chosen": -670.0, "logps/rejected": -773.0, "logits/chosen": 0.08782959, "logits/rejected": 0.1570282, "nll_loss": 0.44970703, "epoch": 0.35258359, "global_step/max_steps": "29/82", "percentage": "35.37%", "elapsed_time": "2h 11m 47s", "remaining_time": "4h 0m 50s"}
32
- {"loss": 0.52459717, "grad_norm": 2.73000193, "learning_rate": 7.12e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003676, "rewards/chosen": 3.609375, "rewards/rejected": -4.16015625, "rewards/accuracies": 0.96875, "rewards/margins": 7.7578125, "logps/chosen": -703.5, "logps/rejected": -729.0, "logits/chosen": 0.09490967, "logits/rejected": 0.13220215, "nll_loss": 0.45361328, "epoch": 0.36474164, "global_step/max_steps": "30/82", "percentage": "36.59%", "elapsed_time": "2h 15m 40s", "remaining_time": "3h 55m 9s"}
33
- {"eval_loss": 0.5146243, "eval_runtime": 284.6021, "eval_samples_per_second": 0.285, "eval_steps_per_second": 0.039, "eval_rewards/chosen": 4.67151976, "eval_rewards/rejected": -5.63991499, "eval_rewards/accuracies": 0.96590906, "eval_rewards/margins": 10.30681801, "eval_logps/chosen": -745.27270508, "eval_logps/rejected": -873.81817627, "eval_logits/chosen": 0.07627175, "eval_logits/rejected": 0.16787997, "eval_nll_loss": 0.43874291, "epoch": 0.36474164, "global_step/max_steps": "30/82", "percentage": "36.59%", "elapsed_time": "2h 20m 24s", "remaining_time": "4h 3m 23s"}
34
- {"loss": 0.52911377, "grad_norm": 4.15321207, "learning_rate": 6.99e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003525, "rewards/chosen": 5.03515625, "rewards/rejected": -3.30078125, "rewards/accuracies": 0.9375, "rewards/margins": 8.3359375, "logps/chosen": -822.0, "logps/rejected": -903.0, "logits/chosen": 0.15161133, "logits/rejected": 0.18579102, "nll_loss": 0.45556641, "epoch": 0.3768997, "global_step/max_steps": "31/82", "percentage": "37.80%", "elapsed_time": "2h 26m 13s", "remaining_time": "4h 0m 33s"}
35
- {"loss": 0.48426819, "grad_norm": 0.98345941, "learning_rate": 6.85e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003538, "rewards/chosen": 5.26171875, "rewards/rejected": -4.13867188, "rewards/accuracies": 1.0, "rewards/margins": 9.390625, "logps/chosen": -605.0, "logps/rejected": -756.5, "logits/chosen": 0.07507324, "logits/rejected": 0.17346191, "nll_loss": 0.46533203, "epoch": 0.38905775, "global_step/max_steps": "32/82", "percentage": "39.02%", "elapsed_time": "2h 30m 24s", "remaining_time": "3h 55m 1s"}
36
- {"loss": 0.4743042, "grad_norm": 0.98263085, "learning_rate": 6.71e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003548, "rewards/chosen": 5.55859375, "rewards/rejected": -3.04785156, "rewards/accuracies": 1.0, "rewards/margins": 8.6171875, "logps/chosen": -720.5, "logps/rejected": -763.0, "logits/chosen": 0.09216309, "logits/rejected": 0.12890625, "nll_loss": 0.44873047, "epoch": 0.40121581, "global_step/max_steps": "33/82", "percentage": "40.24%", "elapsed_time": "2h 34m 39s", "remaining_time": "3h 49m 39s"}
37
- {"loss": 0.76196289, "grad_norm": 5.01305246, "learning_rate": 6.58e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003554, "rewards/chosen": 6.125, "rewards/rejected": -1.93945312, "rewards/accuracies": 0.90625, "rewards/margins": 8.0625, "logps/chosen": -750.5, "logps/rejected": -848.0, "logits/chosen": 0.09442139, "logits/rejected": 0.13879395, "nll_loss": 0.45410156, "epoch": 0.41337386, "global_step/max_steps": "34/82", "percentage": "41.46%", "elapsed_time": "2h 39m 5s", "remaining_time": "3h 44m 35s"}
38
- {"loss": 0.51202393, "grad_norm": 3.85906219, "learning_rate": 6.44e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003589, "rewards/chosen": 3.49414062, "rewards/rejected": -3.91894531, "rewards/accuracies": 0.96875, "rewards/margins": 7.421875, "logps/chosen": -748.0, "logps/rejected": -833.0, "logits/chosen": 0.14282227, "logits/rejected": 0.21447754, "nll_loss": 0.46337891, "epoch": 0.42553191, "global_step/max_steps": "35/82", "percentage": "42.68%", "elapsed_time": "2h 42m 12s", "remaining_time": "3h 37m 49s"}
39
- {"loss": 0.6362915, "grad_norm": 4.13025427, "learning_rate": 6.3e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003603, "rewards/chosen": 4.91796875, "rewards/rejected": -4.125, "rewards/accuracies": 0.9375, "rewards/margins": 9.0390625, "logps/chosen": -690.0, "logps/rejected": -751.5, "logits/chosen": 0.07999802, "logits/rejected": 0.12402344, "nll_loss": 0.42333984, "epoch": 0.43768997, "global_step/max_steps": "36/82", "percentage": "43.90%", "elapsed_time": "2h 46m 12s", "remaining_time": "3h 32m 23s"}
40
- {"loss": 0.45474243, "grad_norm": 1.02976191, "learning_rate": 6.16e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003602, "rewards/chosen": 7.36523438, "rewards/rejected": -2.82128906, "rewards/accuracies": 1.0, "rewards/margins": 10.1875, "logps/chosen": -786.5, "logps/rejected": -914.5, "logits/chosen": 0.09468079, "logits/rejected": 0.16748047, "nll_loss": 0.44042969, "epoch": 0.44984802, "global_step/max_steps": "37/82", "percentage": "45.12%", "elapsed_time": "2h 50m 51s", "remaining_time": "3h 27m 47s"}
41
- {"loss": 0.4602356, "grad_norm": 2.15465045, "learning_rate": 6.03e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003601, "rewards/chosen": 5.40820312, "rewards/rejected": -2.81640625, "rewards/accuracies": 0.96875, "rewards/margins": 8.2109375, "logps/chosen": -872.5, "logps/rejected": -944.0, "logits/chosen": 0.13708496, "logits/rejected": 0.18884277, "nll_loss": 0.42431641, "epoch": 0.46200608, "global_step/max_steps": "38/82", "percentage": "46.34%", "elapsed_time": "2h 55m 32s", "remaining_time": "3h 23m 15s"}
42
- {"loss": 0.48181152, "grad_norm": 1.20502436, "learning_rate": 5.89e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003611, "rewards/chosen": 7.46875, "rewards/rejected": -0.81445312, "rewards/accuracies": 1.0, "rewards/margins": 8.2734375, "logps/chosen": -785.0, "logps/rejected": -864.0, "logits/chosen": 0.09637451, "logits/rejected": 0.1463623, "nll_loss": 0.45263672, "epoch": 0.47416413, "global_step/max_steps": "39/82", "percentage": "47.56%", "elapsed_time": "2h 59m 40s", "remaining_time": "3h 18m 5s"}
43
- {"loss": 0.54425049, "grad_norm": 2.70972872, "learning_rate": 5.75e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003609, "rewards/chosen": 5.51953125, "rewards/rejected": -3.58203125, "rewards/accuracies": 0.90625, "rewards/margins": 9.1015625, "logps/chosen": -639.5, "logps/rejected": -732.0, "logits/chosen": 0.09173584, "logits/rejected": 0.13598633, "nll_loss": 0.43603516, "epoch": 0.48632219, "global_step/max_steps": "40/82", "percentage": "48.78%", "elapsed_time": "3h 4m 23s", "remaining_time": "3h 13m 36s"}
44
- {"eval_loss": 0.48624372, "eval_runtime": 284.3552, "eval_samples_per_second": 0.285, "eval_steps_per_second": 0.039, "eval_rewards/chosen": 6.887784, "eval_rewards/rejected": -3.45239258, "eval_rewards/accuracies": 0.98863637, "eval_rewards/margins": 10.340909, "eval_logps/chosen": -722.36364746, "eval_logps/rejected": -852.36364746, "eval_logits/chosen": 0.06572377, "eval_logits/rejected": 0.16534978, "eval_nll_loss": 0.42294034, "epoch": 0.48632219, "global_step/max_steps": "40/82", "percentage": "48.78%", "elapsed_time": "3h 9m 8s", "remaining_time": "3h 18m 35s"}
45
- {"loss": 0.50234985, "grad_norm": 2.63879228, "learning_rate": 5.62e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.0035, "rewards/chosen": 8.921875, "rewards/rejected": 0.98828125, "rewards/accuracies": 0.96875, "rewards/margins": 7.921875, "logps/chosen": -954.0, "logps/rejected": -1100.0, "logits/chosen": 0.20483398, "logits/rejected": 0.25, "nll_loss": 0.43701172, "epoch": 0.49848024, "global_step/max_steps": "41/82", "percentage": "50.00%", "elapsed_time": "3h 14m 53s", "remaining_time": "3h 14m 53s"}
46
- {"loss": 0.42147827, "grad_norm": 0.57520199, "learning_rate": 5.48e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003532, "rewards/chosen": 7.21875, "rewards/rejected": -2.15625, "rewards/accuracies": 1.0, "rewards/margins": 9.375, "logps/chosen": -620.0, "logps/rejected": -770.0, "logits/chosen": 0.12142944, "logits/rejected": 0.21203613, "nll_loss": 0.41162109, "epoch": 0.5106383, "global_step/max_steps": "42/82", "percentage": "51.22%", "elapsed_time": "3h 17m 51s", "remaining_time": "3h 8m 26s"}
47
- {"loss": 0.46865845, "grad_norm": 0.44169736, "learning_rate": 5.34e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003538, "rewards/chosen": 8.25, "rewards/rejected": -2.00976562, "rewards/accuracies": 1.0, "rewards/margins": 10.2578125, "logps/chosen": -711.5, "logps/rejected": -801.0, "logits/chosen": 0.1260376, "logits/rejected": 0.18054199, "nll_loss": 0.46240234, "epoch": 0.52279635, "global_step/max_steps": "43/82", "percentage": "52.44%", "elapsed_time": "3h 22m 13s", "remaining_time": "3h 3m 24s"}
48
- {"loss": 0.46195984, "grad_norm": 0.35730186, "learning_rate": 5.21e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003547, "rewards/chosen": 9.609375, "rewards/rejected": 0.27978516, "rewards/accuracies": 1.0, "rewards/margins": 9.328125, "logps/chosen": -783.5, "logps/rejected": -843.5, "logits/chosen": 0.08758545, "logits/rejected": 0.12467957, "nll_loss": 0.45751953, "epoch": 0.53495441, "global_step/max_steps": "44/82", "percentage": "53.66%", "elapsed_time": "3h 26m 23s", "remaining_time": "2h 58m 15s"}
49
- {"loss": 0.4153595, "grad_norm": 0.56697857, "learning_rate": 5.07e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003562, "rewards/chosen": 6.69921875, "rewards/rejected": -1.41113281, "rewards/accuracies": 1.0, "rewards/margins": 8.1171875, "logps/chosen": -601.0, "logps/rejected": -666.5, "logits/chosen": 0.12435913, "logits/rejected": 0.16799927, "nll_loss": 0.40234375, "epoch": 0.54711246, "global_step/max_steps": "45/82", "percentage": "54.88%", "elapsed_time": "3h 30m 13s", "remaining_time": "2h 52m 51s"}
50
- {"loss": 0.49664307, "grad_norm": 0.24403928, "learning_rate": 4.93e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003581, "rewards/chosen": 7.25, "rewards/rejected": -2.03076172, "rewards/accuracies": 1.0, "rewards/margins": 9.25, "logps/chosen": -719.5, "logps/rejected": -824.0, "logits/chosen": 0.14086914, "logits/rejected": 0.21032715, "nll_loss": 0.49511719, "epoch": 0.55927052, "global_step/max_steps": "46/82", "percentage": "56.10%", "elapsed_time": "3h 33m 45s", "remaining_time": "2h 47m 17s"}
51
- {"loss": 0.40992737, "grad_norm": 0.31083292, "learning_rate": 4.79e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003593, "rewards/chosen": 8.3515625, "rewards/rejected": -1.38183594, "rewards/accuracies": 1.0, "rewards/margins": 9.75, "logps/chosen": -724.5, "logps/rejected": -821.0, "logits/chosen": 0.11798096, "logits/rejected": 0.18908691, "nll_loss": 0.40576172, "epoch": 0.57142857, "global_step/max_steps": "47/82", "percentage": "57.32%", "elapsed_time": "3h 37m 41s", "remaining_time": "2h 42m 6s"}
52
- {"loss": 0.45254517, "grad_norm": 1.89613712, "learning_rate": 4.66e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003616, "rewards/chosen": 8.125, "rewards/rejected": -0.14941406, "rewards/accuracies": 1.0, "rewards/margins": 8.2890625, "logps/chosen": -689.5, "logps/rejected": -783.0, "logits/chosen": 0.09466553, "logits/rejected": 0.15246582, "nll_loss": 0.4140625, "epoch": 0.58358663, "global_step/max_steps": "48/82", "percentage": "58.54%", "elapsed_time": "3h 40m 52s", "remaining_time": "2h 36m 27s"}
53
- {"loss": 0.43908691, "grad_norm": 0.35176185, "learning_rate": 4.52e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003614, "rewards/chosen": 9.40625, "rewards/rejected": 0.77832031, "rewards/accuracies": 1.0, "rewards/margins": 8.6484375, "logps/chosen": -751.0, "logps/rejected": -850.0, "logits/chosen": 0.18481445, "logits/rejected": 0.22509766, "nll_loss": 0.43066406, "epoch": 0.59574468, "global_step/max_steps": "49/82", "percentage": "59.76%", "elapsed_time": "3h 45m 37s", "remaining_time": "2h 31m 57s"}
54
- {"loss": 0.4453125, "grad_norm": 2.43514061, "learning_rate": 4.38e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.00361, "rewards/chosen": 10.4296875, "rewards/rejected": 0.18334961, "rewards/accuracies": 0.96875, "rewards/margins": 10.25, "logps/chosen": -878.0, "logps/rejected": -904.0, "logits/chosen": 0.18847656, "logits/rejected": 0.19177246, "nll_loss": 0.40380859, "epoch": 0.60790274, "global_step/max_steps": "50/82", "percentage": "60.98%", "elapsed_time": "3h 50m 30s", "remaining_time": "2h 27m 31s"}
55
- {"eval_loss": 0.44147858, "eval_runtime": 284.8285, "eval_samples_per_second": 0.284, "eval_steps_per_second": 0.039, "eval_rewards/chosen": 8.55113602, "eval_rewards/rejected": -2.08753562, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.64488602, "eval_logps/chosen": -706.45452881, "eval_logps/rejected": -838.18182373, "eval_logits/chosen": 0.04702343, "eval_logits/rejected": 0.15043502, "eval_nll_loss": 0.40909091, "epoch": 0.60790274, "global_step/max_steps": "50/82", "percentage": "60.98%", "elapsed_time": "3h 55m 15s", "remaining_time": "2h 30m 33s"}
56
- {"loss": 0.5045166, "grad_norm": 0.51986378, "learning_rate": 4.25e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003525, "rewards/chosen": 10.125, "rewards/rejected": 0.81738281, "rewards/accuracies": 1.0, "rewards/margins": 9.3046875, "logps/chosen": -739.0, "logps/rejected": -773.0, "logits/chosen": 0.09991455, "logits/rejected": 0.13964844, "nll_loss": 0.49414062, "epoch": 0.62006079, "global_step/max_steps": "51/82", "percentage": "62.20%", "elapsed_time": "4h 0m 49s", "remaining_time": "2h 26m 23s"}
57
- {"loss": 0.46807861, "grad_norm": 7.00124788, "learning_rate": 4.11e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.00354, "rewards/chosen": 7.921875, "rewards/rejected": -1.58203125, "rewards/accuracies": 0.96875, "rewards/margins": 9.5, "logps/chosen": -719.5, "logps/rejected": -830.0, "logits/chosen": 0.06224823, "logits/rejected": 0.1451416, "nll_loss": 0.40527344, "epoch": 0.63221884, "global_step/max_steps": "52/82", "percentage": "63.41%", "elapsed_time": "4h 4m 29s", "remaining_time": "2h 21m 3s"}
58
- {"loss": 0.3977356, "grad_norm": 0.31328031, "learning_rate": 3.97e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003543, "rewards/chosen": 10.125, "rewards/rejected": 1.37768555, "rewards/accuracies": 1.0, "rewards/margins": 8.75, "logps/chosen": -764.0, "logps/rejected": -885.0, "logits/chosen": 0.11346436, "logits/rejected": 0.171875, "nll_loss": 0.39257812, "epoch": 0.6443769, "global_step/max_steps": "53/82", "percentage": "64.63%", "elapsed_time": "4h 9m 0s", "remaining_time": "2h 16m 14s"}
59
- {"loss": 0.48883057, "grad_norm": 2.52998948, "learning_rate": 3.84e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003553, "rewards/chosen": 9.8828125, "rewards/rejected": -0.39453125, "rewards/accuracies": 0.96875, "rewards/margins": 10.2578125, "logps/chosen": -528.5, "logps/rejected": -632.5, "logits/chosen": 0.12454224, "logits/rejected": 0.1741333, "nll_loss": 0.41503906, "epoch": 0.65653495, "global_step/max_steps": "54/82", "percentage": "65.85%", "elapsed_time": "4h 12m 56s", "remaining_time": "2h 11m 9s"}
60
- {"loss": 0.42504883, "grad_norm": 0.18627305, "learning_rate": 3.7e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003573, "rewards/chosen": 8.3359375, "rewards/rejected": -1.53417969, "rewards/accuracies": 1.0, "rewards/margins": 9.859375, "logps/chosen": -625.5, "logps/rejected": -785.5, "logits/chosen": 0.0847168, "logits/rejected": 0.18884277, "nll_loss": 0.42382812, "epoch": 0.66869301, "global_step/max_steps": "55/82", "percentage": "67.07%", "elapsed_time": "4h 16m 15s", "remaining_time": "2h 5m 47s"}
61
- {"loss": 0.47116089, "grad_norm": 0.50617564, "learning_rate": 3.56e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003583, "rewards/chosen": 9.125, "rewards/rejected": -1.52001953, "rewards/accuracies": 1.0, "rewards/margins": 10.6328125, "logps/chosen": -660.5, "logps/rejected": -714.0, "logits/chosen": 0.0710144, "logits/rejected": 0.1244812, "nll_loss": 0.46386719, "epoch": 0.68085106, "global_step/max_steps": "56/82", "percentage": "68.29%", "elapsed_time": "4h 20m 7s", "remaining_time": "2h 0m 46s"}
62
- {"loss": 0.4299469, "grad_norm": 0.30912438, "learning_rate": 3.42e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003592, "rewards/chosen": 10.7265625, "rewards/rejected": 0.171875, "rewards/accuracies": 1.0, "rewards/margins": 10.5625, "logps/chosen": -822.5, "logps/rejected": -881.0, "logits/chosen": 0.10540771, "logits/rejected": 0.15472412, "nll_loss": 0.42675781, "epoch": 0.69300912, "global_step/max_steps": "57/82", "percentage": "69.51%", "elapsed_time": "4h 24m 9s", "remaining_time": "1h 55m 51s"}
63
- {"loss": 0.43502808, "grad_norm": 1.00541997, "learning_rate": 3.29e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003597, "rewards/chosen": 9.671875, "rewards/rejected": 0.00976562, "rewards/accuracies": 1.0, "rewards/margins": 9.65625, "logps/chosen": -764.0, "logps/rejected": -835.0, "logits/chosen": 0.1048584, "logits/rejected": 0.16162109, "nll_loss": 0.41748047, "epoch": 0.70516717, "global_step/max_steps": "58/82", "percentage": "70.73%", "elapsed_time": "4h 28m 25s", "remaining_time": "1h 51m 4s"}
64
- {"loss": 0.45611572, "grad_norm": 2.08780837, "learning_rate": 3.15e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003604, "rewards/chosen": 8.96875, "rewards/rejected": -0.65820312, "rewards/accuracies": 1.0, "rewards/margins": 9.6484375, "logps/chosen": -673.0, "logps/rejected": -851.0, "logits/chosen": 0.11431885, "logits/rejected": 0.20141602, "nll_loss": 0.43164062, "epoch": 0.71732523, "global_step/max_steps": "59/82", "percentage": "71.95%", "elapsed_time": "4h 32m 30s", "remaining_time": "1h 46m 13s"}
65
- {"loss": 0.44345093, "grad_norm": 1.43791628, "learning_rate": 3.01e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.00361, "rewards/chosen": 9.046875, "rewards/rejected": -0.90625, "rewards/accuracies": 0.96875, "rewards/margins": 9.9609375, "logps/chosen": -601.0, "logps/rejected": -709.0, "logits/chosen": 0.08343506, "logits/rejected": 0.15600586, "nll_loss": 0.4140625, "epoch": 0.72948328, "global_step/max_steps": "60/82", "percentage": "73.17%", "elapsed_time": "4h 36m 38s", "remaining_time": "1h 41m 26s"}
66
- {"eval_loss": 0.43094134, "eval_runtime": 284.6835, "eval_samples_per_second": 0.285, "eval_steps_per_second": 0.039, "eval_rewards/chosen": 9.88068199, "eval_rewards/rejected": -0.98508525, "eval_rewards/accuracies": 1.0, "eval_rewards/margins": 10.86931801, "eval_logps/chosen": -693.45452881, "eval_logps/rejected": -827.09088135, "eval_logits/chosen": 0.03915128, "eval_logits/rejected": 0.14150724, "eval_nll_loss": 0.39879262, "epoch": 0.72948328, "global_step/max_steps": "60/82", "percentage": "73.17%", "elapsed_time": "4h 41m 23s", "remaining_time": "1h 43m 10s"}
67
- {"loss": 0.43286133, "grad_norm": 2.25892663, "learning_rate": 2.88e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003542, "rewards/chosen": 10.84375, "rewards/rejected": 1.16357422, "rewards/accuracies": 1.0, "rewards/margins": 9.6875, "logps/chosen": -717.0, "logps/rejected": -814.0, "logits/chosen": 0.07861328, "logits/rejected": 0.12997437, "nll_loss": 0.41552734, "epoch": 0.74164134, "global_step/max_steps": "61/82", "percentage": "74.39%", "elapsed_time": "4h 46m 41s", "remaining_time": "1h 38m 41s"}
68
- {"loss": 0.40936279, "grad_norm": 0.43523028, "learning_rate": 2.74e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003554, "rewards/chosen": 10.75, "rewards/rejected": 1.65234375, "rewards/accuracies": 1.0, "rewards/margins": 9.109375, "logps/chosen": -653.0, "logps/rejected": -777.0, "logits/chosen": 0.1078186, "logits/rejected": 0.18383789, "nll_loss": 0.40136719, "epoch": 0.75379939, "global_step/max_steps": "62/82", "percentage": "75.61%", "elapsed_time": "4h 50m 26s", "remaining_time": "1h 33m 41s"}
69
- {"loss": 0.41888428, "grad_norm": 0.43725225, "learning_rate": 2.6e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003565, "rewards/chosen": 11.75, "rewards/rejected": 1.87695312, "rewards/accuracies": 1.0, "rewards/margins": 9.875, "logps/chosen": -747.0, "logps/rejected": -866.0, "logits/chosen": 0.09594727, "logits/rejected": 0.16003418, "nll_loss": 0.41162109, "epoch": 0.76595745, "global_step/max_steps": "63/82", "percentage": "76.83%", "elapsed_time": "4h 54m 10s", "remaining_time": "1h 28m 43s"}
70
- {"loss": 0.49224854, "grad_norm": 2.68773913, "learning_rate": 2.47e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003575, "rewards/chosen": 11.0625, "rewards/rejected": 2.11816406, "rewards/accuracies": 0.96875, "rewards/margins": 8.9375, "logps/chosen": -721.0, "logps/rejected": -796.0, "logits/chosen": 0.11767578, "logits/rejected": 0.17553711, "nll_loss": 0.43261719, "epoch": 0.7781155, "global_step/max_steps": "64/82", "percentage": "78.05%", "elapsed_time": "4h 58m 1s", "remaining_time": "1h 23m 49s"}
71
- {"loss": 0.41763306, "grad_norm": 0.25608841, "learning_rate": 2.33e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003594, "rewards/chosen": 9.4453125, "rewards/rejected": -0.87890625, "rewards/accuracies": 1.0, "rewards/margins": 10.34375, "logps/chosen": -518.0, "logps/rejected": -618.0, "logits/chosen": 0.03510284, "logits/rejected": 0.12524414, "nll_loss": 0.41210938, "epoch": 0.79027356, "global_step/max_steps": "65/82", "percentage": "79.27%", "elapsed_time": "5h 1m 5s", "remaining_time": "1h 18m 44s"}
72
- {"loss": 0.41522217, "grad_norm": 0.31340778, "learning_rate": 2.19e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003598, "rewards/chosen": 11.6875, "rewards/rejected": 1.69335938, "rewards/accuracies": 1.0, "rewards/margins": 10.0, "logps/chosen": -741.0, "logps/rejected": -918.0, "logits/chosen": 0.11352539, "logits/rejected": 0.20507812, "nll_loss": 0.41015625, "epoch": 0.80243161, "global_step/max_steps": "66/82", "percentage": "80.49%", "elapsed_time": "5h 5m 24s", "remaining_time": "1h 14m 2s"}
73
- {"loss": 0.4039917, "grad_norm": 0.29040954, "learning_rate": 2.05e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003602, "rewards/chosen": 13.609375, "rewards/rejected": 2.3828125, "rewards/accuracies": 1.0, "rewards/margins": 11.25, "logps/chosen": -754.0, "logps/rejected": -970.0, "logits/chosen": 0.06204987, "logits/rejected": 0.16821289, "nll_loss": 0.39990234, "epoch": 0.81458967, "global_step/max_steps": "67/82", "percentage": "81.71%", "elapsed_time": "5h 9m 38s", "remaining_time": "1h 9m 19s"}
74
- {"loss": 0.52520752, "grad_norm": 3.47112751, "learning_rate": 1.92e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003611, "rewards/chosen": 7.9140625, "rewards/rejected": -0.94506836, "rewards/accuracies": 0.96875, "rewards/margins": 8.8671875, "logps/chosen": -517.0, "logps/rejected": -629.0, "logits/chosen": 0.05895996, "logits/rejected": 0.13134766, "nll_loss": 0.41259766, "epoch": 0.82674772, "global_step/max_steps": "68/82", "percentage": "82.93%", "elapsed_time": "5h 13m 30s", "remaining_time": "1h 4m 32s"}
75
- {"loss": 0.44619751, "grad_norm": 1.94311345, "learning_rate": 1.78e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003616, "rewards/chosen": 9.4765625, "rewards/rejected": -0.58984375, "rewards/accuracies": 0.96875, "rewards/margins": 10.046875, "logps/chosen": -616.0, "logps/rejected": -728.0, "logits/chosen": 0.10736084, "logits/rejected": 0.16064453, "nll_loss": 0.41943359, "epoch": 0.83890578, "global_step/max_steps": "69/82", "percentage": "84.15%", "elapsed_time": "5h 17m 40s", "remaining_time": "59m 51s"}
76
- {"loss": 0.40203857, "grad_norm": 0.17752735, "learning_rate": 1.64e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003622, "rewards/chosen": 12.2421875, "rewards/rejected": 2.45703125, "rewards/accuracies": 1.0, "rewards/margins": 9.828125, "logps/chosen": -790.0, "logps/rejected": -922.0, "logits/chosen": 0.14575195, "logits/rejected": 0.19726562, "nll_loss": 0.39941406, "epoch": 0.85106383, "global_step/max_steps": "70/82", "percentage": "85.37%", "elapsed_time": "5h 21m 45s", "remaining_time": "55m 9s"}
77
- {"eval_loss": 0.42657697, "eval_runtime": 285.0897, "eval_samples_per_second": 0.284, "eval_steps_per_second": 0.039, "eval_rewards/chosen": 10.69318199, "eval_rewards/rejected": -0.41974431, "eval_rewards/accuracies": 0.98863637, "eval_rewards/margins": 11.122159, "eval_logps/chosen": -684.09088135, "eval_logps/rejected": -821.63635254, "eval_logits/chosen": 0.0332919, "eval_logits/rejected": 0.13420798, "eval_nll_loss": 0.39213422, "epoch": 0.85106383, "global_step/max_steps": "70/82", "percentage": "85.37%", "elapsed_time": "5h 26m 30s", "remaining_time": "55m 58s"}
78
- {"loss": 0.41540527, "grad_norm": 0.22874653, "learning_rate": 1.51e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.00356, "rewards/chosen": 13.546875, "rewards/rejected": 1.49804688, "rewards/accuracies": 1.0, "rewards/margins": 12.078125, "logps/chosen": -801.0, "logps/rejected": -911.0, "logits/chosen": 0.08815002, "logits/rejected": 0.14550781, "nll_loss": 0.41357422, "epoch": 0.86322188, "global_step/max_steps": "71/82", "percentage": "86.59%", "elapsed_time": "5h 32m 4s", "remaining_time": "51m 26s"}
79
- {"loss": 0.37530518, "grad_norm": 0.69678283, "learning_rate": 1.37e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003563, "rewards/chosen": 12.421875, "rewards/rejected": 1.99316406, "rewards/accuracies": 1.0, "rewards/margins": 10.4296875, "logps/chosen": -702.0, "logps/rejected": -829.0, "logits/chosen": 0.0859375, "logits/rejected": 0.16503906, "nll_loss": 0.36865234, "epoch": 0.87537994, "global_step/max_steps": "72/82", "percentage": "87.80%", "elapsed_time": "5h 36m 28s", "remaining_time": "46m 43s"}
80
- {"loss": 0.39328003, "grad_norm": 0.21047515, "learning_rate": 1.23e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.00357, "rewards/chosen": 9.25, "rewards/rejected": -1.953125, "rewards/accuracies": 1.0, "rewards/margins": 11.203125, "logps/chosen": -568.5, "logps/rejected": -729.0, "logits/chosen": 0.05722809, "logits/rejected": 0.15216064, "nll_loss": 0.39111328, "epoch": 0.88753799, "global_step/max_steps": "73/82", "percentage": "89.02%", "elapsed_time": "5h 40m 29s", "remaining_time": "41m 58s"}
81
- {"loss": 0.38693237, "grad_norm": 0.47950187, "learning_rate": 1.1e-06, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003572, "rewards/chosen": 13.6875, "rewards/rejected": 1.81835938, "rewards/accuracies": 1.0, "rewards/margins": 11.890625, "logps/chosen": -688.0, "logps/rejected": -799.0, "logits/chosen": 0.09814453, "logits/rejected": 0.14685059, "nll_loss": 0.37695312, "epoch": 0.89969605, "global_step/max_steps": "74/82", "percentage": "90.24%", "elapsed_time": "5h 44m 57s", "remaining_time": "37m 17s"}
82
- {"loss": 0.44073486, "grad_norm": 0.1730583, "learning_rate": 9.6e-07, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.00358, "rewards/chosen": 11.90625, "rewards/rejected": 1.73242188, "rewards/accuracies": 1.0, "rewards/margins": 10.15625, "logps/chosen": -686.0, "logps/rejected": -771.0, "logits/chosen": 0.05981445, "logits/rejected": 0.12109375, "nll_loss": 0.43945312, "epoch": 0.9118541, "global_step/max_steps": "75/82", "percentage": "91.46%", "elapsed_time": "5h 48m 48s", "remaining_time": "32m 33s"}
83
- {"loss": 0.40435791, "grad_norm": 0.19026437, "learning_rate": 8.2e-07, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003588, "rewards/chosen": 10.734375, "rewards/rejected": -0.27099609, "rewards/accuracies": 1.0, "rewards/margins": 11.015625, "logps/chosen": -636.0, "logps/rejected": -758.0, "logits/chosen": 0.11766052, "logits/rejected": 0.15029907, "nll_loss": 0.40332031, "epoch": 0.92401216, "global_step/max_steps": "76/82", "percentage": "92.68%", "elapsed_time": "5h 52m 43s", "remaining_time": "27m 50s"}
84
- {"loss": 0.38546753, "grad_norm": 0.24236318, "learning_rate": 6.8e-07, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003603, "rewards/chosen": 12.9375, "rewards/rejected": -0.52246094, "rewards/accuracies": 1.0, "rewards/margins": 13.46875, "logps/chosen": -569.0, "logps/rejected": -638.0, "logits/chosen": 0.07489777, "logits/rejected": 0.09332275, "nll_loss": 0.38330078, "epoch": 0.93617021, "global_step/max_steps": "77/82", "percentage": "93.90%", "elapsed_time": "5h 55m 50s", "remaining_time": "23m 6s"}
85
- {"loss": 0.41549683, "grad_norm": 1.45089507, "learning_rate": 5.5e-07, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003605, "rewards/chosen": 11.15625, "rewards/rejected": -0.00439453, "rewards/accuracies": 1.0, "rewards/margins": 11.171875, "logps/chosen": -622.0, "logps/rejected": -796.0, "logits/chosen": 0.14172363, "logits/rejected": 0.22143555, "nll_loss": 0.40722656, "epoch": 0.94832827, "global_step/max_steps": "78/82", "percentage": "95.12%", "elapsed_time": "6h 0m 13s", "remaining_time": "18m 28s"}
86
- {"loss": 0.44415283, "grad_norm": 0.21063523, "learning_rate": 4.1e-07, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003613, "rewards/chosen": 10.625, "rewards/rejected": 0.20446777, "rewards/accuracies": 1.0, "rewards/margins": 10.40625, "logps/chosen": -627.0, "logps/rejected": -710.0, "logits/chosen": 0.07611084, "logits/rejected": 0.11773682, "nll_loss": 0.44140625, "epoch": 0.96048632, "global_step/max_steps": "79/82", "percentage": "96.34%", "elapsed_time": "6h 4m 3s", "remaining_time": "13m 49s"}
87
- {"loss": 0.38951111, "grad_norm": 0.21627389, "learning_rate": 2.7e-07, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003624, "rewards/chosen": 12.3125, "rewards/rejected": 0.78027344, "rewards/accuracies": 1.0, "rewards/margins": 11.53125, "logps/chosen": -592.0, "logps/rejected": -684.0, "logits/chosen": 0.11889648, "logits/rejected": 0.15625, "nll_loss": 0.38867188, "epoch": 0.97264438, "global_step/max_steps": "80/82", "percentage": "97.56%", "elapsed_time": "6h 7m 37s", "remaining_time": "9m 11s"}
88
- {"eval_loss": 0.42359906, "eval_runtime": 285.0331, "eval_samples_per_second": 0.284, "eval_steps_per_second": 0.039, "eval_rewards/chosen": 11.346591, "eval_rewards/rejected": 0.14364347, "eval_rewards/accuracies": 0.98863637, "eval_rewards/margins": 11.20454502, "eval_logps/chosen": -677.54547119, "eval_logps/rejected": -816.36364746, "eval_logits/chosen": 0.03244851, "eval_logits/rejected": 0.13173328, "eval_nll_loss": 0.38751775, "epoch": 0.97264438, "global_step/max_steps": "80/82", "percentage": "97.56%", "elapsed_time": "6h 12m 22s", "remaining_time": "9m 18s"}
89
- {"loss": 0.36994934, "grad_norm": 0.4961237, "learning_rate": 1.4e-07, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003582, "rewards/chosen": 10.75, "rewards/rejected": 0.72167969, "rewards/accuracies": 1.0, "rewards/margins": 10.046875, "logps/chosen": -616.0, "logps/rejected": -790.0, "logits/chosen": 0.03601074, "logits/rejected": 0.12182617, "nll_loss": 0.36132812, "epoch": 0.98480243, "global_step/max_steps": "81/82", "percentage": "98.78%", "elapsed_time": "6h 16m 34s", "remaining_time": "4m 38s"}
90
- {"loss": 0.45687866, "grad_norm": 0.70900959, "learning_rate": 0.0, "memory(GiB)": 133.48, "train_speed(iter/s)": 0.003593, "rewards/chosen": 13.859375, "rewards/rejected": 4.22070312, "rewards/accuracies": 1.0, "rewards/margins": 9.640625, "logps/chosen": -892.0, "logps/rejected": -938.0, "logits/chosen": 0.13778687, "logits/rejected": 0.16650391, "nll_loss": 0.44189453, "epoch": 0.99696049, "global_step/max_steps": "82/82", "percentage": "100.00%", "elapsed_time": "6h 20m 1s", "remaining_time": "0s"}
91
- {"eval_loss": 0.42223668, "eval_runtime": 284.8613, "eval_samples_per_second": 0.284, "eval_steps_per_second": 0.039, "eval_rewards/chosen": 11.346591, "eval_rewards/rejected": 0.18328303, "eval_rewards/accuracies": 0.98863637, "eval_rewards/margins": 11.159091, "eval_logps/chosen": -677.54547119, "eval_logps/rejected": -815.63635254, "eval_logits/chosen": 0.03209339, "eval_logits/rejected": 0.13144475, "eval_nll_loss": 0.38742897, "epoch": 0.99696049, "global_step/max_steps": "82/82", "percentage": "100.00%", "elapsed_time": "6h 24m 46s", "remaining_time": "0s"}
92
- {"train_runtime": 23141.2942, "train_samples_per_second": 0.114, "train_steps_per_second": 0.004, "total_flos": 71494911983616.0, "train_loss": 0.54884692, "epoch": 0.99696049, "global_step/max_steps": "82/82", "percentage": "100.00%", "elapsed_time": "6h 25m 39s", "remaining_time": "0s"}
 
1
+ {"loss": 1.10846329, "token_acc": 0.76005025, "grad_norm": 16.77027702, "learning_rate": 5e-08, "memory(GiB)": 29.76, "train_speed(iter/s)": 0.020907, "epoch": 0.00053505, "global_step/max_steps": "1/1869", "percentage": "0.05%", "elapsed_time": "22s", "remaining_time": "11h 50m 24s"}
2
+ {"loss": 1.12982225, "token_acc": 0.68363636, "grad_norm": 21.60894012, "learning_rate": 1.1e-07, "memory(GiB)": 36.2, "train_speed(iter/s)": 0.026755, "epoch": 0.00107009, "global_step/max_steps": "2/1869", "percentage": "0.11%", "elapsed_time": "49s", "remaining_time": "12h 53m 52s"}
3
+ {"loss": 1.07696795, "token_acc": 0.72697003, "grad_norm": 14.1061182, "learning_rate": 1.6e-07, "memory(GiB)": 36.2, "train_speed(iter/s)": 0.030209, "epoch": 0.00160514, "global_step/max_steps": "3/1869", "percentage": "0.16%", "elapsed_time": "1m 14s", "remaining_time": "12h 50m 11s"}
4
+ {"loss": 1.1647048, "token_acc": 0.72707889, "grad_norm": 17.34974098, "learning_rate": 2.1e-07, "memory(GiB)": 36.2, "train_speed(iter/s)": 0.032071, "epoch": 0.00214018, "global_step/max_steps": "4/1869", "percentage": "0.21%", "elapsed_time": "1m 39s", "remaining_time": "12h 54m 49s"}
5
+ {"loss": 1.03058517, "token_acc": 0.78461538, "grad_norm": 13.97839832, "learning_rate": 2.7e-07, "memory(GiB)": 38.16, "train_speed(iter/s)": 0.033244, "epoch": 0.00267523, "global_step/max_steps": "5/1869", "percentage": "0.27%", "elapsed_time": "2m 5s", "remaining_time": "12h 59m 5s"}
6
+ {"loss": 1.07988381, "token_acc": 0.75729927, "grad_norm": 14.92414665, "learning_rate": 3.2e-07, "memory(GiB)": 38.16, "train_speed(iter/s)": 0.03754, "epoch": 0.00321027, "global_step/max_steps": "6/1869", "percentage": "0.32%", "elapsed_time": "2m 14s", "remaining_time": "11h 37m 39s"}
7
+ {"loss": 1.02987361, "token_acc": 0.68942548, "grad_norm": 9.45815849, "learning_rate": 3.7e-07, "memory(GiB)": 38.16, "train_speed(iter/s)": 0.042107, "epoch": 0.00374532, "global_step/max_steps": "7/1869", "percentage": "0.37%", "elapsed_time": "2m 21s", "remaining_time": "10h 26m 6s"}
8
+ {"loss": 1.07785511, "token_acc": 0.7925, "grad_norm": 10.87460232, "learning_rate": 4.3e-07, "memory(GiB)": 38.16, "train_speed(iter/s)": 0.046302, "epoch": 0.00428036, "global_step/max_steps": "8/1869", "percentage": "0.43%", "elapsed_time": "2m 27s", "remaining_time": "9h 32m 53s"}
9
+ {"loss": 1.01309848, "token_acc": 0.78978622, "grad_norm": 10.11265278, "learning_rate": 4.8e-07, "memory(GiB)": 38.16, "train_speed(iter/s)": 0.049079, "epoch": 0.00481541, "global_step/max_steps": "9/1869", "percentage": "0.48%", "elapsed_time": "2m 38s", "remaining_time": "9h 5m 28s"}
10
+ {"loss": 0.97182792, "token_acc": 0.77007299, "grad_norm": 9.2328577, "learning_rate": 5.3e-07, "memory(GiB)": 38.16, "train_speed(iter/s)": 0.052715, "epoch": 0.00535045, "global_step/max_steps": "10/1869", "percentage": "0.54%", "elapsed_time": "2m 44s", "remaining_time": "8h 30m 14s"}
11
+ {"eval_loss": 0.93822712, "eval_token_acc": 0.77502154, "eval_runtime": 230.5467, "eval_samples_per_second": 2.004, "eval_steps_per_second": 0.252, "epoch": 0.00535045, "global_step/max_steps": "10/1869", "percentage": "0.54%", "elapsed_time": "6m 35s", "remaining_time": "20h 24m 33s"}
12
+ {"loss": 0.8288908, "token_acc": 0.77817543, "grad_norm": 6.67973661, "learning_rate": 5.9e-07, "memory(GiB)": 112.78, "train_speed(iter/s)": 0.022076, "epoch": 0.0058855, "global_step/max_steps": "11/1869", "percentage": "0.59%", "elapsed_time": "7m 53s", "remaining_time": "22h 12m 20s"}
13
+ {"loss": 0.86488545, "token_acc": 0.74363057, "grad_norm": 10.70631313, "learning_rate": 6.4e-07, "memory(GiB)": 112.78, "train_speed(iter/s)": 0.022881, "epoch": 0.00642055, "global_step/max_steps": "12/1869", "percentage": "0.64%", "elapsed_time": "8m 19s", "remaining_time": "21h 28m 6s"}
14
+ {"loss": 1.00706434, "token_acc": 0.72903226, "grad_norm": 9.83619118, "learning_rate": 7e-07, "memory(GiB)": 118.65, "train_speed(iter/s)": 0.024344, "epoch": 0.00695559, "global_step/max_steps": "13/1869", "percentage": "0.70%", "elapsed_time": "8m 28s", "remaining_time": "20h 11m 8s"}
15
+ {"loss": 0.74669749, "token_acc": 0.83842239, "grad_norm": 5.23266602, "learning_rate": 7.5e-07, "memory(GiB)": 118.65, "train_speed(iter/s)": 0.024775, "epoch": 0.00749064, "global_step/max_steps": "14/1869", "percentage": "0.75%", "elapsed_time": "9m 0s", "remaining_time": "19h 52m 39s"}
16
+ {"loss": 0.8471846, "token_acc": 0.78861789, "grad_norm": 6.47993088, "learning_rate": 8e-07, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.025013, "epoch": 0.00802568, "global_step/max_steps": "15/1869", "percentage": "0.80%", "elapsed_time": "9m 34s", "remaining_time": "19h 43m 49s"}
17
+ {"loss": 0.77965558, "token_acc": 0.81611208, "grad_norm": 5.04291487, "learning_rate": 8.6e-07, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.026421, "epoch": 0.00856073, "global_step/max_steps": "16/1869", "percentage": "0.86%", "elapsed_time": "9m 40s", "remaining_time": "18h 40m 36s"}
18
+ {"loss": 0.80958372, "token_acc": 0.75545852, "grad_norm": 5.18472624, "learning_rate": 9.1e-07, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.027765, "epoch": 0.00909577, "global_step/max_steps": "17/1869", "percentage": "0.91%", "elapsed_time": "9m 47s", "remaining_time": "17h 46m 18s"}
19
+ {"loss": 0.70913279, "token_acc": 0.85760518, "grad_norm": 5.61203241, "learning_rate": 9.6e-07, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.029081, "epoch": 0.00963082, "global_step/max_steps": "18/1869", "percentage": "0.96%", "elapsed_time": "9m 53s", "remaining_time": "16h 57m 58s"}
20
+ {"loss": 0.72859496, "token_acc": 0.80189673, "grad_norm": 9.47189426, "learning_rate": 1.02e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.030258, "epoch": 0.01016586, "global_step/max_steps": "19/1869", "percentage": "1.02%", "elapsed_time": "10m 2s", "remaining_time": "16h 18m 25s"}
21
+ {"loss": 0.73913312, "token_acc": 0.73609314, "grad_norm": 4.01692152, "learning_rate": 1.07e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.030973, "epoch": 0.01070091, "global_step/max_steps": "20/1869", "percentage": "1.07%", "elapsed_time": "10m 20s", "remaining_time": "15h 56m 23s"}
22
+ {"eval_loss": 0.73671097, "eval_token_acc": 0.79061684, "eval_runtime": 230.7649, "eval_samples_per_second": 2.002, "eval_steps_per_second": 0.251, "epoch": 0.01070091, "global_step/max_steps": "20/1869", "percentage": "1.07%", "elapsed_time": "14m 11s", "remaining_time": "21h 51m 58s"}
23
+ {"loss": 0.71632719, "token_acc": 0.79510949, "grad_norm": 5.31815338, "learning_rate": 1.12e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.022449, "epoch": 0.01123596, "global_step/max_steps": "21/1869", "percentage": "1.12%", "elapsed_time": "15m 10s", "remaining_time": "22h 15m 19s"}
24
+ {"loss": 0.75135165, "token_acc": 0.77522936, "grad_norm": 4.08145428, "learning_rate": 1.18e-06, "memory(GiB)": 129.17, "train_speed(iter/s)": 0.022744, "epoch": 0.011771, "global_step/max_steps": "22/1869", "percentage": "1.18%", "elapsed_time": "15m 42s", "remaining_time": "21h 58m 26s"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
special_tokens_map.json CHANGED
@@ -14,7 +14,7 @@
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<|eot_id|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "<|finetune_right_pad_id|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393
3
- size 17209961
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
+ size 17209920
tokenizer_config.json CHANGED
@@ -33,7 +33,7 @@
33
  "special": true
34
  },
35
  "128004": {
36
- "content": "<|reserved_special_token_2|>",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
@@ -41,7 +41,7 @@
41
  "special": true
42
  },
43
  "128005": {
44
- "content": "<|reserved_special_token_3|>",
45
  "lstrip": false,
46
  "normalized": false,
47
  "rstrip": false,
@@ -65,7 +65,7 @@
65
  "special": true
66
  },
67
  "128008": {
68
- "content": "<|reserved_special_token_4|>",
69
  "lstrip": false,
70
  "normalized": false,
71
  "rstrip": false,
@@ -81,7 +81,7 @@
81
  "special": true
82
  },
83
  "128010": {
84
- "content": "<|reserved_special_token_5|>",
85
  "lstrip": false,
86
  "normalized": false,
87
  "rstrip": false,
@@ -89,7 +89,7 @@
89
  "special": true
90
  },
91
  "128011": {
92
- "content": "<|reserved_special_token_6|>",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
@@ -97,7 +97,7 @@
97
  "special": true
98
  },
99
  "128012": {
100
- "content": "<|reserved_special_token_7|>",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
@@ -105,7 +105,7 @@
105
  "special": true
106
  },
107
  "128013": {
108
- "content": "<|reserved_special_token_8|>",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  "special": true
114
  },
115
  "128014": {
116
- "content": "<|reserved_special_token_9|>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
@@ -121,7 +121,7 @@
121
  "special": true
122
  },
123
  "128015": {
124
- "content": "<|reserved_special_token_10|>",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
@@ -129,7 +129,7 @@
129
  "special": true
130
  },
131
  "128016": {
132
- "content": "<|reserved_special_token_11|>",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,
@@ -137,7 +137,7 @@
137
  "special": true
138
  },
139
  "128017": {
140
- "content": "<|reserved_special_token_12|>",
141
  "lstrip": false,
142
  "normalized": false,
143
  "rstrip": false,
@@ -145,7 +145,7 @@
145
  "special": true
146
  },
147
  "128018": {
148
- "content": "<|reserved_special_token_13|>",
149
  "lstrip": false,
150
  "normalized": false,
151
  "rstrip": false,
@@ -153,7 +153,7 @@
153
  "special": true
154
  },
155
  "128019": {
156
- "content": "<|reserved_special_token_14|>",
157
  "lstrip": false,
158
  "normalized": false,
159
  "rstrip": false,
@@ -161,7 +161,7 @@
161
  "special": true
162
  },
163
  "128020": {
164
- "content": "<|reserved_special_token_15|>",
165
  "lstrip": false,
166
  "normalized": false,
167
  "rstrip": false,
@@ -169,7 +169,7 @@
169
  "special": true
170
  },
171
  "128021": {
172
- "content": "<|reserved_special_token_16|>",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
@@ -177,7 +177,7 @@
177
  "special": true
178
  },
179
  "128022": {
180
- "content": "<|reserved_special_token_17|>",
181
  "lstrip": false,
182
  "normalized": false,
183
  "rstrip": false,
@@ -185,7 +185,7 @@
185
  "special": true
186
  },
187
  "128023": {
188
- "content": "<|reserved_special_token_18|>",
189
  "lstrip": false,
190
  "normalized": false,
191
  "rstrip": false,
@@ -193,7 +193,7 @@
193
  "special": true
194
  },
195
  "128024": {
196
- "content": "<|reserved_special_token_19|>",
197
  "lstrip": false,
198
  "normalized": false,
199
  "rstrip": false,
@@ -201,7 +201,7 @@
201
  "special": true
202
  },
203
  "128025": {
204
- "content": "<|reserved_special_token_20|>",
205
  "lstrip": false,
206
  "normalized": false,
207
  "rstrip": false,
@@ -209,7 +209,7 @@
209
  "special": true
210
  },
211
  "128026": {
212
- "content": "<|reserved_special_token_21|>",
213
  "lstrip": false,
214
  "normalized": false,
215
  "rstrip": false,
@@ -217,7 +217,7 @@
217
  "special": true
218
  },
219
  "128027": {
220
- "content": "<|reserved_special_token_22|>",
221
  "lstrip": false,
222
  "normalized": false,
223
  "rstrip": false,
@@ -225,7 +225,7 @@
225
  "special": true
226
  },
227
  "128028": {
228
- "content": "<|reserved_special_token_23|>",
229
  "lstrip": false,
230
  "normalized": false,
231
  "rstrip": false,
@@ -233,7 +233,7 @@
233
  "special": true
234
  },
235
  "128029": {
236
- "content": "<|reserved_special_token_24|>",
237
  "lstrip": false,
238
  "normalized": false,
239
  "rstrip": false,
@@ -241,7 +241,7 @@
241
  "special": true
242
  },
243
  "128030": {
244
- "content": "<|reserved_special_token_25|>",
245
  "lstrip": false,
246
  "normalized": false,
247
  "rstrip": false,
@@ -249,7 +249,7 @@
249
  "special": true
250
  },
251
  "128031": {
252
- "content": "<|reserved_special_token_26|>",
253
  "lstrip": false,
254
  "normalized": false,
255
  "rstrip": false,
@@ -257,7 +257,7 @@
257
  "special": true
258
  },
259
  "128032": {
260
- "content": "<|reserved_special_token_27|>",
261
  "lstrip": false,
262
  "normalized": false,
263
  "rstrip": false,
@@ -265,7 +265,7 @@
265
  "special": true
266
  },
267
  "128033": {
268
- "content": "<|reserved_special_token_28|>",
269
  "lstrip": false,
270
  "normalized": false,
271
  "rstrip": false,
@@ -273,7 +273,7 @@
273
  "special": true
274
  },
275
  "128034": {
276
- "content": "<|reserved_special_token_29|>",
277
  "lstrip": false,
278
  "normalized": false,
279
  "rstrip": false,
@@ -281,7 +281,7 @@
281
  "special": true
282
  },
283
  "128035": {
284
- "content": "<|reserved_special_token_30|>",
285
  "lstrip": false,
286
  "normalized": false,
287
  "rstrip": false,
@@ -289,7 +289,7 @@
289
  "special": true
290
  },
291
  "128036": {
292
- "content": "<|reserved_special_token_31|>",
293
  "lstrip": false,
294
  "normalized": false,
295
  "rstrip": false,
@@ -297,7 +297,7 @@
297
  "special": true
298
  },
299
  "128037": {
300
- "content": "<|reserved_special_token_32|>",
301
  "lstrip": false,
302
  "normalized": false,
303
  "rstrip": false,
@@ -305,7 +305,7 @@
305
  "special": true
306
  },
307
  "128038": {
308
- "content": "<|reserved_special_token_33|>",
309
  "lstrip": false,
310
  "normalized": false,
311
  "rstrip": false,
@@ -313,7 +313,7 @@
313
  "special": true
314
  },
315
  "128039": {
316
- "content": "<|reserved_special_token_34|>",
317
  "lstrip": false,
318
  "normalized": false,
319
  "rstrip": false,
@@ -321,7 +321,7 @@
321
  "special": true
322
  },
323
  "128040": {
324
- "content": "<|reserved_special_token_35|>",
325
  "lstrip": false,
326
  "normalized": false,
327
  "rstrip": false,
@@ -329,7 +329,7 @@
329
  "special": true
330
  },
331
  "128041": {
332
- "content": "<|reserved_special_token_36|>",
333
  "lstrip": false,
334
  "normalized": false,
335
  "rstrip": false,
@@ -337,7 +337,7 @@
337
  "special": true
338
  },
339
  "128042": {
340
- "content": "<|reserved_special_token_37|>",
341
  "lstrip": false,
342
  "normalized": false,
343
  "rstrip": false,
@@ -345,7 +345,7 @@
345
  "special": true
346
  },
347
  "128043": {
348
- "content": "<|reserved_special_token_38|>",
349
  "lstrip": false,
350
  "normalized": false,
351
  "rstrip": false,
@@ -353,7 +353,7 @@
353
  "special": true
354
  },
355
  "128044": {
356
- "content": "<|reserved_special_token_39|>",
357
  "lstrip": false,
358
  "normalized": false,
359
  "rstrip": false,
@@ -361,7 +361,7 @@
361
  "special": true
362
  },
363
  "128045": {
364
- "content": "<|reserved_special_token_40|>",
365
  "lstrip": false,
366
  "normalized": false,
367
  "rstrip": false,
@@ -369,7 +369,7 @@
369
  "special": true
370
  },
371
  "128046": {
372
- "content": "<|reserved_special_token_41|>",
373
  "lstrip": false,
374
  "normalized": false,
375
  "rstrip": false,
@@ -377,7 +377,7 @@
377
  "special": true
378
  },
379
  "128047": {
380
- "content": "<|reserved_special_token_42|>",
381
  "lstrip": false,
382
  "normalized": false,
383
  "rstrip": false,
@@ -385,7 +385,7 @@
385
  "special": true
386
  },
387
  "128048": {
388
- "content": "<|reserved_special_token_43|>",
389
  "lstrip": false,
390
  "normalized": false,
391
  "rstrip": false,
@@ -393,7 +393,7 @@
393
  "special": true
394
  },
395
  "128049": {
396
- "content": "<|reserved_special_token_44|>",
397
  "lstrip": false,
398
  "normalized": false,
399
  "rstrip": false,
@@ -401,7 +401,7 @@
401
  "special": true
402
  },
403
  "128050": {
404
- "content": "<|reserved_special_token_45|>",
405
  "lstrip": false,
406
  "normalized": false,
407
  "rstrip": false,
@@ -409,7 +409,7 @@
409
  "special": true
410
  },
411
  "128051": {
412
- "content": "<|reserved_special_token_46|>",
413
  "lstrip": false,
414
  "normalized": false,
415
  "rstrip": false,
@@ -417,7 +417,7 @@
417
  "special": true
418
  },
419
  "128052": {
420
- "content": "<|reserved_special_token_47|>",
421
  "lstrip": false,
422
  "normalized": false,
423
  "rstrip": false,
@@ -425,7 +425,7 @@
425
  "special": true
426
  },
427
  "128053": {
428
- "content": "<|reserved_special_token_48|>",
429
  "lstrip": false,
430
  "normalized": false,
431
  "rstrip": false,
@@ -433,7 +433,7 @@
433
  "special": true
434
  },
435
  "128054": {
436
- "content": "<|reserved_special_token_49|>",
437
  "lstrip": false,
438
  "normalized": false,
439
  "rstrip": false,
@@ -441,7 +441,7 @@
441
  "special": true
442
  },
443
  "128055": {
444
- "content": "<|reserved_special_token_50|>",
445
  "lstrip": false,
446
  "normalized": false,
447
  "rstrip": false,
@@ -449,7 +449,7 @@
449
  "special": true
450
  },
451
  "128056": {
452
- "content": "<|reserved_special_token_51|>",
453
  "lstrip": false,
454
  "normalized": false,
455
  "rstrip": false,
@@ -457,7 +457,7 @@
457
  "special": true
458
  },
459
  "128057": {
460
- "content": "<|reserved_special_token_52|>",
461
  "lstrip": false,
462
  "normalized": false,
463
  "rstrip": false,
@@ -465,7 +465,7 @@
465
  "special": true
466
  },
467
  "128058": {
468
- "content": "<|reserved_special_token_53|>",
469
  "lstrip": false,
470
  "normalized": false,
471
  "rstrip": false,
@@ -473,7 +473,7 @@
473
  "special": true
474
  },
475
  "128059": {
476
- "content": "<|reserved_special_token_54|>",
477
  "lstrip": false,
478
  "normalized": false,
479
  "rstrip": false,
@@ -481,7 +481,7 @@
481
  "special": true
482
  },
483
  "128060": {
484
- "content": "<|reserved_special_token_55|>",
485
  "lstrip": false,
486
  "normalized": false,
487
  "rstrip": false,
@@ -489,7 +489,7 @@
489
  "special": true
490
  },
491
  "128061": {
492
- "content": "<|reserved_special_token_56|>",
493
  "lstrip": false,
494
  "normalized": false,
495
  "rstrip": false,
@@ -497,7 +497,7 @@
497
  "special": true
498
  },
499
  "128062": {
500
- "content": "<|reserved_special_token_57|>",
501
  "lstrip": false,
502
  "normalized": false,
503
  "rstrip": false,
@@ -505,7 +505,7 @@
505
  "special": true
506
  },
507
  "128063": {
508
- "content": "<|reserved_special_token_58|>",
509
  "lstrip": false,
510
  "normalized": false,
511
  "rstrip": false,
@@ -513,7 +513,7 @@
513
  "special": true
514
  },
515
  "128064": {
516
- "content": "<|reserved_special_token_59|>",
517
  "lstrip": false,
518
  "normalized": false,
519
  "rstrip": false,
@@ -521,7 +521,7 @@
521
  "special": true
522
  },
523
  "128065": {
524
- "content": "<|reserved_special_token_60|>",
525
  "lstrip": false,
526
  "normalized": false,
527
  "rstrip": false,
@@ -529,7 +529,7 @@
529
  "special": true
530
  },
531
  "128066": {
532
- "content": "<|reserved_special_token_61|>",
533
  "lstrip": false,
534
  "normalized": false,
535
  "rstrip": false,
@@ -537,7 +537,7 @@
537
  "special": true
538
  },
539
  "128067": {
540
- "content": "<|reserved_special_token_62|>",
541
  "lstrip": false,
542
  "normalized": false,
543
  "rstrip": false,
@@ -545,7 +545,7 @@
545
  "special": true
546
  },
547
  "128068": {
548
- "content": "<|reserved_special_token_63|>",
549
  "lstrip": false,
550
  "normalized": false,
551
  "rstrip": false,
@@ -553,7 +553,7 @@
553
  "special": true
554
  },
555
  "128069": {
556
- "content": "<|reserved_special_token_64|>",
557
  "lstrip": false,
558
  "normalized": false,
559
  "rstrip": false,
@@ -561,7 +561,7 @@
561
  "special": true
562
  },
563
  "128070": {
564
- "content": "<|reserved_special_token_65|>",
565
  "lstrip": false,
566
  "normalized": false,
567
  "rstrip": false,
@@ -569,7 +569,7 @@
569
  "special": true
570
  },
571
  "128071": {
572
- "content": "<|reserved_special_token_66|>",
573
  "lstrip": false,
574
  "normalized": false,
575
  "rstrip": false,
@@ -577,7 +577,7 @@
577
  "special": true
578
  },
579
  "128072": {
580
- "content": "<|reserved_special_token_67|>",
581
  "lstrip": false,
582
  "normalized": false,
583
  "rstrip": false,
@@ -585,7 +585,7 @@
585
  "special": true
586
  },
587
  "128073": {
588
- "content": "<|reserved_special_token_68|>",
589
  "lstrip": false,
590
  "normalized": false,
591
  "rstrip": false,
@@ -593,7 +593,7 @@
593
  "special": true
594
  },
595
  "128074": {
596
- "content": "<|reserved_special_token_69|>",
597
  "lstrip": false,
598
  "normalized": false,
599
  "rstrip": false,
@@ -601,7 +601,7 @@
601
  "special": true
602
  },
603
  "128075": {
604
- "content": "<|reserved_special_token_70|>",
605
  "lstrip": false,
606
  "normalized": false,
607
  "rstrip": false,
@@ -609,7 +609,7 @@
609
  "special": true
610
  },
611
  "128076": {
612
- "content": "<|reserved_special_token_71|>",
613
  "lstrip": false,
614
  "normalized": false,
615
  "rstrip": false,
@@ -617,7 +617,7 @@
617
  "special": true
618
  },
619
  "128077": {
620
- "content": "<|reserved_special_token_72|>",
621
  "lstrip": false,
622
  "normalized": false,
623
  "rstrip": false,
@@ -625,7 +625,7 @@
625
  "special": true
626
  },
627
  "128078": {
628
- "content": "<|reserved_special_token_73|>",
629
  "lstrip": false,
630
  "normalized": false,
631
  "rstrip": false,
@@ -633,7 +633,7 @@
633
  "special": true
634
  },
635
  "128079": {
636
- "content": "<|reserved_special_token_74|>",
637
  "lstrip": false,
638
  "normalized": false,
639
  "rstrip": false,
@@ -641,7 +641,7 @@
641
  "special": true
642
  },
643
  "128080": {
644
- "content": "<|reserved_special_token_75|>",
645
  "lstrip": false,
646
  "normalized": false,
647
  "rstrip": false,
@@ -649,7 +649,7 @@
649
  "special": true
650
  },
651
  "128081": {
652
- "content": "<|reserved_special_token_76|>",
653
  "lstrip": false,
654
  "normalized": false,
655
  "rstrip": false,
@@ -657,7 +657,7 @@
657
  "special": true
658
  },
659
  "128082": {
660
- "content": "<|reserved_special_token_77|>",
661
  "lstrip": false,
662
  "normalized": false,
663
  "rstrip": false,
@@ -665,7 +665,7 @@
665
  "special": true
666
  },
667
  "128083": {
668
- "content": "<|reserved_special_token_78|>",
669
  "lstrip": false,
670
  "normalized": false,
671
  "rstrip": false,
@@ -673,7 +673,7 @@
673
  "special": true
674
  },
675
  "128084": {
676
- "content": "<|reserved_special_token_79|>",
677
  "lstrip": false,
678
  "normalized": false,
679
  "rstrip": false,
@@ -681,7 +681,7 @@
681
  "special": true
682
  },
683
  "128085": {
684
- "content": "<|reserved_special_token_80|>",
685
  "lstrip": false,
686
  "normalized": false,
687
  "rstrip": false,
@@ -689,7 +689,7 @@
689
  "special": true
690
  },
691
  "128086": {
692
- "content": "<|reserved_special_token_81|>",
693
  "lstrip": false,
694
  "normalized": false,
695
  "rstrip": false,
@@ -697,7 +697,7 @@
697
  "special": true
698
  },
699
  "128087": {
700
- "content": "<|reserved_special_token_82|>",
701
  "lstrip": false,
702
  "normalized": false,
703
  "rstrip": false,
@@ -705,7 +705,7 @@
705
  "special": true
706
  },
707
  "128088": {
708
- "content": "<|reserved_special_token_83|>",
709
  "lstrip": false,
710
  "normalized": false,
711
  "rstrip": false,
@@ -713,7 +713,7 @@
713
  "special": true
714
  },
715
  "128089": {
716
- "content": "<|reserved_special_token_84|>",
717
  "lstrip": false,
718
  "normalized": false,
719
  "rstrip": false,
@@ -721,7 +721,7 @@
721
  "special": true
722
  },
723
  "128090": {
724
- "content": "<|reserved_special_token_85|>",
725
  "lstrip": false,
726
  "normalized": false,
727
  "rstrip": false,
@@ -729,7 +729,7 @@
729
  "special": true
730
  },
731
  "128091": {
732
- "content": "<|reserved_special_token_86|>",
733
  "lstrip": false,
734
  "normalized": false,
735
  "rstrip": false,
@@ -737,7 +737,7 @@
737
  "special": true
738
  },
739
  "128092": {
740
- "content": "<|reserved_special_token_87|>",
741
  "lstrip": false,
742
  "normalized": false,
743
  "rstrip": false,
@@ -745,7 +745,7 @@
745
  "special": true
746
  },
747
  "128093": {
748
- "content": "<|reserved_special_token_88|>",
749
  "lstrip": false,
750
  "normalized": false,
751
  "rstrip": false,
@@ -753,7 +753,7 @@
753
  "special": true
754
  },
755
  "128094": {
756
- "content": "<|reserved_special_token_89|>",
757
  "lstrip": false,
758
  "normalized": false,
759
  "rstrip": false,
@@ -761,7 +761,7 @@
761
  "special": true
762
  },
763
  "128095": {
764
- "content": "<|reserved_special_token_90|>",
765
  "lstrip": false,
766
  "normalized": false,
767
  "rstrip": false,
@@ -769,7 +769,7 @@
769
  "special": true
770
  },
771
  "128096": {
772
- "content": "<|reserved_special_token_91|>",
773
  "lstrip": false,
774
  "normalized": false,
775
  "rstrip": false,
@@ -777,7 +777,7 @@
777
  "special": true
778
  },
779
  "128097": {
780
- "content": "<|reserved_special_token_92|>",
781
  "lstrip": false,
782
  "normalized": false,
783
  "rstrip": false,
@@ -785,7 +785,7 @@
785
  "special": true
786
  },
787
  "128098": {
788
- "content": "<|reserved_special_token_93|>",
789
  "lstrip": false,
790
  "normalized": false,
791
  "rstrip": false,
@@ -793,7 +793,7 @@
793
  "special": true
794
  },
795
  "128099": {
796
- "content": "<|reserved_special_token_94|>",
797
  "lstrip": false,
798
  "normalized": false,
799
  "rstrip": false,
@@ -801,7 +801,7 @@
801
  "special": true
802
  },
803
  "128100": {
804
- "content": "<|reserved_special_token_95|>",
805
  "lstrip": false,
806
  "normalized": false,
807
  "rstrip": false,
@@ -809,7 +809,7 @@
809
  "special": true
810
  },
811
  "128101": {
812
- "content": "<|reserved_special_token_96|>",
813
  "lstrip": false,
814
  "normalized": false,
815
  "rstrip": false,
@@ -817,7 +817,7 @@
817
  "special": true
818
  },
819
  "128102": {
820
- "content": "<|reserved_special_token_97|>",
821
  "lstrip": false,
822
  "normalized": false,
823
  "rstrip": false,
@@ -825,7 +825,7 @@
825
  "special": true
826
  },
827
  "128103": {
828
- "content": "<|reserved_special_token_98|>",
829
  "lstrip": false,
830
  "normalized": false,
831
  "rstrip": false,
@@ -833,7 +833,7 @@
833
  "special": true
834
  },
835
  "128104": {
836
- "content": "<|reserved_special_token_99|>",
837
  "lstrip": false,
838
  "normalized": false,
839
  "rstrip": false,
@@ -841,7 +841,7 @@
841
  "special": true
842
  },
843
  "128105": {
844
- "content": "<|reserved_special_token_100|>",
845
  "lstrip": false,
846
  "normalized": false,
847
  "rstrip": false,
@@ -849,7 +849,7 @@
849
  "special": true
850
  },
851
  "128106": {
852
- "content": "<|reserved_special_token_101|>",
853
  "lstrip": false,
854
  "normalized": false,
855
  "rstrip": false,
@@ -857,7 +857,7 @@
857
  "special": true
858
  },
859
  "128107": {
860
- "content": "<|reserved_special_token_102|>",
861
  "lstrip": false,
862
  "normalized": false,
863
  "rstrip": false,
@@ -865,7 +865,7 @@
865
  "special": true
866
  },
867
  "128108": {
868
- "content": "<|reserved_special_token_103|>",
869
  "lstrip": false,
870
  "normalized": false,
871
  "rstrip": false,
@@ -873,7 +873,7 @@
873
  "special": true
874
  },
875
  "128109": {
876
- "content": "<|reserved_special_token_104|>",
877
  "lstrip": false,
878
  "normalized": false,
879
  "rstrip": false,
@@ -881,7 +881,7 @@
881
  "special": true
882
  },
883
  "128110": {
884
- "content": "<|reserved_special_token_105|>",
885
  "lstrip": false,
886
  "normalized": false,
887
  "rstrip": false,
@@ -889,7 +889,7 @@
889
  "special": true
890
  },
891
  "128111": {
892
- "content": "<|reserved_special_token_106|>",
893
  "lstrip": false,
894
  "normalized": false,
895
  "rstrip": false,
@@ -897,7 +897,7 @@
897
  "special": true
898
  },
899
  "128112": {
900
- "content": "<|reserved_special_token_107|>",
901
  "lstrip": false,
902
  "normalized": false,
903
  "rstrip": false,
@@ -905,7 +905,7 @@
905
  "special": true
906
  },
907
  "128113": {
908
- "content": "<|reserved_special_token_108|>",
909
  "lstrip": false,
910
  "normalized": false,
911
  "rstrip": false,
@@ -913,7 +913,7 @@
913
  "special": true
914
  },
915
  "128114": {
916
- "content": "<|reserved_special_token_109|>",
917
  "lstrip": false,
918
  "normalized": false,
919
  "rstrip": false,
@@ -921,7 +921,7 @@
921
  "special": true
922
  },
923
  "128115": {
924
- "content": "<|reserved_special_token_110|>",
925
  "lstrip": false,
926
  "normalized": false,
927
  "rstrip": false,
@@ -929,7 +929,7 @@
929
  "special": true
930
  },
931
  "128116": {
932
- "content": "<|reserved_special_token_111|>",
933
  "lstrip": false,
934
  "normalized": false,
935
  "rstrip": false,
@@ -937,7 +937,7 @@
937
  "special": true
938
  },
939
  "128117": {
940
- "content": "<|reserved_special_token_112|>",
941
  "lstrip": false,
942
  "normalized": false,
943
  "rstrip": false,
@@ -945,7 +945,7 @@
945
  "special": true
946
  },
947
  "128118": {
948
- "content": "<|reserved_special_token_113|>",
949
  "lstrip": false,
950
  "normalized": false,
951
  "rstrip": false,
@@ -953,7 +953,7 @@
953
  "special": true
954
  },
955
  "128119": {
956
- "content": "<|reserved_special_token_114|>",
957
  "lstrip": false,
958
  "normalized": false,
959
  "rstrip": false,
@@ -961,7 +961,7 @@
961
  "special": true
962
  },
963
  "128120": {
964
- "content": "<|reserved_special_token_115|>",
965
  "lstrip": false,
966
  "normalized": false,
967
  "rstrip": false,
@@ -969,7 +969,7 @@
969
  "special": true
970
  },
971
  "128121": {
972
- "content": "<|reserved_special_token_116|>",
973
  "lstrip": false,
974
  "normalized": false,
975
  "rstrip": false,
@@ -977,7 +977,7 @@
977
  "special": true
978
  },
979
  "128122": {
980
- "content": "<|reserved_special_token_117|>",
981
  "lstrip": false,
982
  "normalized": false,
983
  "rstrip": false,
@@ -985,7 +985,7 @@
985
  "special": true
986
  },
987
  "128123": {
988
- "content": "<|reserved_special_token_118|>",
989
  "lstrip": false,
990
  "normalized": false,
991
  "rstrip": false,
@@ -993,7 +993,7 @@
993
  "special": true
994
  },
995
  "128124": {
996
- "content": "<|reserved_special_token_119|>",
997
  "lstrip": false,
998
  "normalized": false,
999
  "rstrip": false,
@@ -1001,7 +1001,7 @@
1001
  "special": true
1002
  },
1003
  "128125": {
1004
- "content": "<|reserved_special_token_120|>",
1005
  "lstrip": false,
1006
  "normalized": false,
1007
  "rstrip": false,
@@ -1009,7 +1009,7 @@
1009
  "special": true
1010
  },
1011
  "128126": {
1012
- "content": "<|reserved_special_token_121|>",
1013
  "lstrip": false,
1014
  "normalized": false,
1015
  "rstrip": false,
@@ -1017,7 +1017,7 @@
1017
  "special": true
1018
  },
1019
  "128127": {
1020
- "content": "<|reserved_special_token_122|>",
1021
  "lstrip": false,
1022
  "normalized": false,
1023
  "rstrip": false,
@@ -1025,7 +1025,7 @@
1025
  "special": true
1026
  },
1027
  "128128": {
1028
- "content": "<|reserved_special_token_123|>",
1029
  "lstrip": false,
1030
  "normalized": false,
1031
  "rstrip": false,
@@ -1033,7 +1033,7 @@
1033
  "special": true
1034
  },
1035
  "128129": {
1036
- "content": "<|reserved_special_token_124|>",
1037
  "lstrip": false,
1038
  "normalized": false,
1039
  "rstrip": false,
@@ -1041,7 +1041,7 @@
1041
  "special": true
1042
  },
1043
  "128130": {
1044
- "content": "<|reserved_special_token_125|>",
1045
  "lstrip": false,
1046
  "normalized": false,
1047
  "rstrip": false,
@@ -1049,7 +1049,7 @@
1049
  "special": true
1050
  },
1051
  "128131": {
1052
- "content": "<|reserved_special_token_126|>",
1053
  "lstrip": false,
1054
  "normalized": false,
1055
  "rstrip": false,
@@ -1057,7 +1057,7 @@
1057
  "special": true
1058
  },
1059
  "128132": {
1060
- "content": "<|reserved_special_token_127|>",
1061
  "lstrip": false,
1062
  "normalized": false,
1063
  "rstrip": false,
@@ -1065,7 +1065,7 @@
1065
  "special": true
1066
  },
1067
  "128133": {
1068
- "content": "<|reserved_special_token_128|>",
1069
  "lstrip": false,
1070
  "normalized": false,
1071
  "rstrip": false,
@@ -1073,7 +1073,7 @@
1073
  "special": true
1074
  },
1075
  "128134": {
1076
- "content": "<|reserved_special_token_129|>",
1077
  "lstrip": false,
1078
  "normalized": false,
1079
  "rstrip": false,
@@ -1081,7 +1081,7 @@
1081
  "special": true
1082
  },
1083
  "128135": {
1084
- "content": "<|reserved_special_token_130|>",
1085
  "lstrip": false,
1086
  "normalized": false,
1087
  "rstrip": false,
@@ -1089,7 +1089,7 @@
1089
  "special": true
1090
  },
1091
  "128136": {
1092
- "content": "<|reserved_special_token_131|>",
1093
  "lstrip": false,
1094
  "normalized": false,
1095
  "rstrip": false,
@@ -1097,7 +1097,7 @@
1097
  "special": true
1098
  },
1099
  "128137": {
1100
- "content": "<|reserved_special_token_132|>",
1101
  "lstrip": false,
1102
  "normalized": false,
1103
  "rstrip": false,
@@ -1105,7 +1105,7 @@
1105
  "special": true
1106
  },
1107
  "128138": {
1108
- "content": "<|reserved_special_token_133|>",
1109
  "lstrip": false,
1110
  "normalized": false,
1111
  "rstrip": false,
@@ -1113,7 +1113,7 @@
1113
  "special": true
1114
  },
1115
  "128139": {
1116
- "content": "<|reserved_special_token_134|>",
1117
  "lstrip": false,
1118
  "normalized": false,
1119
  "rstrip": false,
@@ -1121,7 +1121,7 @@
1121
  "special": true
1122
  },
1123
  "128140": {
1124
- "content": "<|reserved_special_token_135|>",
1125
  "lstrip": false,
1126
  "normalized": false,
1127
  "rstrip": false,
@@ -1129,7 +1129,7 @@
1129
  "special": true
1130
  },
1131
  "128141": {
1132
- "content": "<|reserved_special_token_136|>",
1133
  "lstrip": false,
1134
  "normalized": false,
1135
  "rstrip": false,
@@ -1137,7 +1137,7 @@
1137
  "special": true
1138
  },
1139
  "128142": {
1140
- "content": "<|reserved_special_token_137|>",
1141
  "lstrip": false,
1142
  "normalized": false,
1143
  "rstrip": false,
@@ -1145,7 +1145,7 @@
1145
  "special": true
1146
  },
1147
  "128143": {
1148
- "content": "<|reserved_special_token_138|>",
1149
  "lstrip": false,
1150
  "normalized": false,
1151
  "rstrip": false,
@@ -1153,7 +1153,7 @@
1153
  "special": true
1154
  },
1155
  "128144": {
1156
- "content": "<|reserved_special_token_139|>",
1157
  "lstrip": false,
1158
  "normalized": false,
1159
  "rstrip": false,
@@ -1161,7 +1161,7 @@
1161
  "special": true
1162
  },
1163
  "128145": {
1164
- "content": "<|reserved_special_token_140|>",
1165
  "lstrip": false,
1166
  "normalized": false,
1167
  "rstrip": false,
@@ -1169,7 +1169,7 @@
1169
  "special": true
1170
  },
1171
  "128146": {
1172
- "content": "<|reserved_special_token_141|>",
1173
  "lstrip": false,
1174
  "normalized": false,
1175
  "rstrip": false,
@@ -1177,7 +1177,7 @@
1177
  "special": true
1178
  },
1179
  "128147": {
1180
- "content": "<|reserved_special_token_142|>",
1181
  "lstrip": false,
1182
  "normalized": false,
1183
  "rstrip": false,
@@ -1185,7 +1185,7 @@
1185
  "special": true
1186
  },
1187
  "128148": {
1188
- "content": "<|reserved_special_token_143|>",
1189
  "lstrip": false,
1190
  "normalized": false,
1191
  "rstrip": false,
@@ -1193,7 +1193,7 @@
1193
  "special": true
1194
  },
1195
  "128149": {
1196
- "content": "<|reserved_special_token_144|>",
1197
  "lstrip": false,
1198
  "normalized": false,
1199
  "rstrip": false,
@@ -1201,7 +1201,7 @@
1201
  "special": true
1202
  },
1203
  "128150": {
1204
- "content": "<|reserved_special_token_145|>",
1205
  "lstrip": false,
1206
  "normalized": false,
1207
  "rstrip": false,
@@ -1209,7 +1209,7 @@
1209
  "special": true
1210
  },
1211
  "128151": {
1212
- "content": "<|reserved_special_token_146|>",
1213
  "lstrip": false,
1214
  "normalized": false,
1215
  "rstrip": false,
@@ -1217,7 +1217,7 @@
1217
  "special": true
1218
  },
1219
  "128152": {
1220
- "content": "<|reserved_special_token_147|>",
1221
  "lstrip": false,
1222
  "normalized": false,
1223
  "rstrip": false,
@@ -1225,7 +1225,7 @@
1225
  "special": true
1226
  },
1227
  "128153": {
1228
- "content": "<|reserved_special_token_148|>",
1229
  "lstrip": false,
1230
  "normalized": false,
1231
  "rstrip": false,
@@ -1233,7 +1233,7 @@
1233
  "special": true
1234
  },
1235
  "128154": {
1236
- "content": "<|reserved_special_token_149|>",
1237
  "lstrip": false,
1238
  "normalized": false,
1239
  "rstrip": false,
@@ -1241,7 +1241,7 @@
1241
  "special": true
1242
  },
1243
  "128155": {
1244
- "content": "<|reserved_special_token_150|>",
1245
  "lstrip": false,
1246
  "normalized": false,
1247
  "rstrip": false,
@@ -1249,7 +1249,7 @@
1249
  "special": true
1250
  },
1251
  "128156": {
1252
- "content": "<|reserved_special_token_151|>",
1253
  "lstrip": false,
1254
  "normalized": false,
1255
  "rstrip": false,
@@ -1257,7 +1257,7 @@
1257
  "special": true
1258
  },
1259
  "128157": {
1260
- "content": "<|reserved_special_token_152|>",
1261
  "lstrip": false,
1262
  "normalized": false,
1263
  "rstrip": false,
@@ -1265,7 +1265,7 @@
1265
  "special": true
1266
  },
1267
  "128158": {
1268
- "content": "<|reserved_special_token_153|>",
1269
  "lstrip": false,
1270
  "normalized": false,
1271
  "rstrip": false,
@@ -1273,7 +1273,7 @@
1273
  "special": true
1274
  },
1275
  "128159": {
1276
- "content": "<|reserved_special_token_154|>",
1277
  "lstrip": false,
1278
  "normalized": false,
1279
  "rstrip": false,
@@ -1281,7 +1281,7 @@
1281
  "special": true
1282
  },
1283
  "128160": {
1284
- "content": "<|reserved_special_token_155|>",
1285
  "lstrip": false,
1286
  "normalized": false,
1287
  "rstrip": false,
@@ -1289,7 +1289,7 @@
1289
  "special": true
1290
  },
1291
  "128161": {
1292
- "content": "<|reserved_special_token_156|>",
1293
  "lstrip": false,
1294
  "normalized": false,
1295
  "rstrip": false,
@@ -1297,7 +1297,7 @@
1297
  "special": true
1298
  },
1299
  "128162": {
1300
- "content": "<|reserved_special_token_157|>",
1301
  "lstrip": false,
1302
  "normalized": false,
1303
  "rstrip": false,
@@ -1305,7 +1305,7 @@
1305
  "special": true
1306
  },
1307
  "128163": {
1308
- "content": "<|reserved_special_token_158|>",
1309
  "lstrip": false,
1310
  "normalized": false,
1311
  "rstrip": false,
@@ -1313,7 +1313,7 @@
1313
  "special": true
1314
  },
1315
  "128164": {
1316
- "content": "<|reserved_special_token_159|>",
1317
  "lstrip": false,
1318
  "normalized": false,
1319
  "rstrip": false,
@@ -1321,7 +1321,7 @@
1321
  "special": true
1322
  },
1323
  "128165": {
1324
- "content": "<|reserved_special_token_160|>",
1325
  "lstrip": false,
1326
  "normalized": false,
1327
  "rstrip": false,
@@ -1329,7 +1329,7 @@
1329
  "special": true
1330
  },
1331
  "128166": {
1332
- "content": "<|reserved_special_token_161|>",
1333
  "lstrip": false,
1334
  "normalized": false,
1335
  "rstrip": false,
@@ -1337,7 +1337,7 @@
1337
  "special": true
1338
  },
1339
  "128167": {
1340
- "content": "<|reserved_special_token_162|>",
1341
  "lstrip": false,
1342
  "normalized": false,
1343
  "rstrip": false,
@@ -1345,7 +1345,7 @@
1345
  "special": true
1346
  },
1347
  "128168": {
1348
- "content": "<|reserved_special_token_163|>",
1349
  "lstrip": false,
1350
  "normalized": false,
1351
  "rstrip": false,
@@ -1353,7 +1353,7 @@
1353
  "special": true
1354
  },
1355
  "128169": {
1356
- "content": "<|reserved_special_token_164|>",
1357
  "lstrip": false,
1358
  "normalized": false,
1359
  "rstrip": false,
@@ -1361,7 +1361,7 @@
1361
  "special": true
1362
  },
1363
  "128170": {
1364
- "content": "<|reserved_special_token_165|>",
1365
  "lstrip": false,
1366
  "normalized": false,
1367
  "rstrip": false,
@@ -1369,7 +1369,7 @@
1369
  "special": true
1370
  },
1371
  "128171": {
1372
- "content": "<|reserved_special_token_166|>",
1373
  "lstrip": false,
1374
  "normalized": false,
1375
  "rstrip": false,
@@ -1377,7 +1377,7 @@
1377
  "special": true
1378
  },
1379
  "128172": {
1380
- "content": "<|reserved_special_token_167|>",
1381
  "lstrip": false,
1382
  "normalized": false,
1383
  "rstrip": false,
@@ -1385,7 +1385,7 @@
1385
  "special": true
1386
  },
1387
  "128173": {
1388
- "content": "<|reserved_special_token_168|>",
1389
  "lstrip": false,
1390
  "normalized": false,
1391
  "rstrip": false,
@@ -1393,7 +1393,7 @@
1393
  "special": true
1394
  },
1395
  "128174": {
1396
- "content": "<|reserved_special_token_169|>",
1397
  "lstrip": false,
1398
  "normalized": false,
1399
  "rstrip": false,
@@ -1401,7 +1401,7 @@
1401
  "special": true
1402
  },
1403
  "128175": {
1404
- "content": "<|reserved_special_token_170|>",
1405
  "lstrip": false,
1406
  "normalized": false,
1407
  "rstrip": false,
@@ -1409,7 +1409,7 @@
1409
  "special": true
1410
  },
1411
  "128176": {
1412
- "content": "<|reserved_special_token_171|>",
1413
  "lstrip": false,
1414
  "normalized": false,
1415
  "rstrip": false,
@@ -1417,7 +1417,7 @@
1417
  "special": true
1418
  },
1419
  "128177": {
1420
- "content": "<|reserved_special_token_172|>",
1421
  "lstrip": false,
1422
  "normalized": false,
1423
  "rstrip": false,
@@ -1425,7 +1425,7 @@
1425
  "special": true
1426
  },
1427
  "128178": {
1428
- "content": "<|reserved_special_token_173|>",
1429
  "lstrip": false,
1430
  "normalized": false,
1431
  "rstrip": false,
@@ -1433,7 +1433,7 @@
1433
  "special": true
1434
  },
1435
  "128179": {
1436
- "content": "<|reserved_special_token_174|>",
1437
  "lstrip": false,
1438
  "normalized": false,
1439
  "rstrip": false,
@@ -1441,7 +1441,7 @@
1441
  "special": true
1442
  },
1443
  "128180": {
1444
- "content": "<|reserved_special_token_175|>",
1445
  "lstrip": false,
1446
  "normalized": false,
1447
  "rstrip": false,
@@ -1449,7 +1449,7 @@
1449
  "special": true
1450
  },
1451
  "128181": {
1452
- "content": "<|reserved_special_token_176|>",
1453
  "lstrip": false,
1454
  "normalized": false,
1455
  "rstrip": false,
@@ -1457,7 +1457,7 @@
1457
  "special": true
1458
  },
1459
  "128182": {
1460
- "content": "<|reserved_special_token_177|>",
1461
  "lstrip": false,
1462
  "normalized": false,
1463
  "rstrip": false,
@@ -1465,7 +1465,7 @@
1465
  "special": true
1466
  },
1467
  "128183": {
1468
- "content": "<|reserved_special_token_178|>",
1469
  "lstrip": false,
1470
  "normalized": false,
1471
  "rstrip": false,
@@ -1473,7 +1473,7 @@
1473
  "special": true
1474
  },
1475
  "128184": {
1476
- "content": "<|reserved_special_token_179|>",
1477
  "lstrip": false,
1478
  "normalized": false,
1479
  "rstrip": false,
@@ -1481,7 +1481,7 @@
1481
  "special": true
1482
  },
1483
  "128185": {
1484
- "content": "<|reserved_special_token_180|>",
1485
  "lstrip": false,
1486
  "normalized": false,
1487
  "rstrip": false,
@@ -1489,7 +1489,7 @@
1489
  "special": true
1490
  },
1491
  "128186": {
1492
- "content": "<|reserved_special_token_181|>",
1493
  "lstrip": false,
1494
  "normalized": false,
1495
  "rstrip": false,
@@ -1497,7 +1497,7 @@
1497
  "special": true
1498
  },
1499
  "128187": {
1500
- "content": "<|reserved_special_token_182|>",
1501
  "lstrip": false,
1502
  "normalized": false,
1503
  "rstrip": false,
@@ -1505,7 +1505,7 @@
1505
  "special": true
1506
  },
1507
  "128188": {
1508
- "content": "<|reserved_special_token_183|>",
1509
  "lstrip": false,
1510
  "normalized": false,
1511
  "rstrip": false,
@@ -1513,7 +1513,7 @@
1513
  "special": true
1514
  },
1515
  "128189": {
1516
- "content": "<|reserved_special_token_184|>",
1517
  "lstrip": false,
1518
  "normalized": false,
1519
  "rstrip": false,
@@ -1521,7 +1521,7 @@
1521
  "special": true
1522
  },
1523
  "128190": {
1524
- "content": "<|reserved_special_token_185|>",
1525
  "lstrip": false,
1526
  "normalized": false,
1527
  "rstrip": false,
@@ -1529,7 +1529,7 @@
1529
  "special": true
1530
  },
1531
  "128191": {
1532
- "content": "<|reserved_special_token_186|>",
1533
  "lstrip": false,
1534
  "normalized": false,
1535
  "rstrip": false,
@@ -1537,7 +1537,7 @@
1537
  "special": true
1538
  },
1539
  "128192": {
1540
- "content": "<|reserved_special_token_187|>",
1541
  "lstrip": false,
1542
  "normalized": false,
1543
  "rstrip": false,
@@ -1545,7 +1545,7 @@
1545
  "special": true
1546
  },
1547
  "128193": {
1548
- "content": "<|reserved_special_token_188|>",
1549
  "lstrip": false,
1550
  "normalized": false,
1551
  "rstrip": false,
@@ -1553,7 +1553,7 @@
1553
  "special": true
1554
  },
1555
  "128194": {
1556
- "content": "<|reserved_special_token_189|>",
1557
  "lstrip": false,
1558
  "normalized": false,
1559
  "rstrip": false,
@@ -1561,7 +1561,7 @@
1561
  "special": true
1562
  },
1563
  "128195": {
1564
- "content": "<|reserved_special_token_190|>",
1565
  "lstrip": false,
1566
  "normalized": false,
1567
  "rstrip": false,
@@ -1569,7 +1569,7 @@
1569
  "special": true
1570
  },
1571
  "128196": {
1572
- "content": "<|reserved_special_token_191|>",
1573
  "lstrip": false,
1574
  "normalized": false,
1575
  "rstrip": false,
@@ -1577,7 +1577,7 @@
1577
  "special": true
1578
  },
1579
  "128197": {
1580
- "content": "<|reserved_special_token_192|>",
1581
  "lstrip": false,
1582
  "normalized": false,
1583
  "rstrip": false,
@@ -1585,7 +1585,7 @@
1585
  "special": true
1586
  },
1587
  "128198": {
1588
- "content": "<|reserved_special_token_193|>",
1589
  "lstrip": false,
1590
  "normalized": false,
1591
  "rstrip": false,
@@ -1593,7 +1593,7 @@
1593
  "special": true
1594
  },
1595
  "128199": {
1596
- "content": "<|reserved_special_token_194|>",
1597
  "lstrip": false,
1598
  "normalized": false,
1599
  "rstrip": false,
@@ -1601,7 +1601,7 @@
1601
  "special": true
1602
  },
1603
  "128200": {
1604
- "content": "<|reserved_special_token_195|>",
1605
  "lstrip": false,
1606
  "normalized": false,
1607
  "rstrip": false,
@@ -1609,7 +1609,7 @@
1609
  "special": true
1610
  },
1611
  "128201": {
1612
- "content": "<|reserved_special_token_196|>",
1613
  "lstrip": false,
1614
  "normalized": false,
1615
  "rstrip": false,
@@ -1617,7 +1617,7 @@
1617
  "special": true
1618
  },
1619
  "128202": {
1620
- "content": "<|reserved_special_token_197|>",
1621
  "lstrip": false,
1622
  "normalized": false,
1623
  "rstrip": false,
@@ -1625,7 +1625,7 @@
1625
  "special": true
1626
  },
1627
  "128203": {
1628
- "content": "<|reserved_special_token_198|>",
1629
  "lstrip": false,
1630
  "normalized": false,
1631
  "rstrip": false,
@@ -1633,7 +1633,7 @@
1633
  "special": true
1634
  },
1635
  "128204": {
1636
- "content": "<|reserved_special_token_199|>",
1637
  "lstrip": false,
1638
  "normalized": false,
1639
  "rstrip": false,
@@ -1641,7 +1641,7 @@
1641
  "special": true
1642
  },
1643
  "128205": {
1644
- "content": "<|reserved_special_token_200|>",
1645
  "lstrip": false,
1646
  "normalized": false,
1647
  "rstrip": false,
@@ -1649,7 +1649,7 @@
1649
  "special": true
1650
  },
1651
  "128206": {
1652
- "content": "<|reserved_special_token_201|>",
1653
  "lstrip": false,
1654
  "normalized": false,
1655
  "rstrip": false,
@@ -1657,7 +1657,7 @@
1657
  "special": true
1658
  },
1659
  "128207": {
1660
- "content": "<|reserved_special_token_202|>",
1661
  "lstrip": false,
1662
  "normalized": false,
1663
  "rstrip": false,
@@ -1665,7 +1665,7 @@
1665
  "special": true
1666
  },
1667
  "128208": {
1668
- "content": "<|reserved_special_token_203|>",
1669
  "lstrip": false,
1670
  "normalized": false,
1671
  "rstrip": false,
@@ -1673,7 +1673,7 @@
1673
  "special": true
1674
  },
1675
  "128209": {
1676
- "content": "<|reserved_special_token_204|>",
1677
  "lstrip": false,
1678
  "normalized": false,
1679
  "rstrip": false,
@@ -1681,7 +1681,7 @@
1681
  "special": true
1682
  },
1683
  "128210": {
1684
- "content": "<|reserved_special_token_205|>",
1685
  "lstrip": false,
1686
  "normalized": false,
1687
  "rstrip": false,
@@ -1689,7 +1689,7 @@
1689
  "special": true
1690
  },
1691
  "128211": {
1692
- "content": "<|reserved_special_token_206|>",
1693
  "lstrip": false,
1694
  "normalized": false,
1695
  "rstrip": false,
@@ -1697,7 +1697,7 @@
1697
  "special": true
1698
  },
1699
  "128212": {
1700
- "content": "<|reserved_special_token_207|>",
1701
  "lstrip": false,
1702
  "normalized": false,
1703
  "rstrip": false,
@@ -1705,7 +1705,7 @@
1705
  "special": true
1706
  },
1707
  "128213": {
1708
- "content": "<|reserved_special_token_208|>",
1709
  "lstrip": false,
1710
  "normalized": false,
1711
  "rstrip": false,
@@ -1713,7 +1713,7 @@
1713
  "special": true
1714
  },
1715
  "128214": {
1716
- "content": "<|reserved_special_token_209|>",
1717
  "lstrip": false,
1718
  "normalized": false,
1719
  "rstrip": false,
@@ -1721,7 +1721,7 @@
1721
  "special": true
1722
  },
1723
  "128215": {
1724
- "content": "<|reserved_special_token_210|>",
1725
  "lstrip": false,
1726
  "normalized": false,
1727
  "rstrip": false,
@@ -1729,7 +1729,7 @@
1729
  "special": true
1730
  },
1731
  "128216": {
1732
- "content": "<|reserved_special_token_211|>",
1733
  "lstrip": false,
1734
  "normalized": false,
1735
  "rstrip": false,
@@ -1737,7 +1737,7 @@
1737
  "special": true
1738
  },
1739
  "128217": {
1740
- "content": "<|reserved_special_token_212|>",
1741
  "lstrip": false,
1742
  "normalized": false,
1743
  "rstrip": false,
@@ -1745,7 +1745,7 @@
1745
  "special": true
1746
  },
1747
  "128218": {
1748
- "content": "<|reserved_special_token_213|>",
1749
  "lstrip": false,
1750
  "normalized": false,
1751
  "rstrip": false,
@@ -1753,7 +1753,7 @@
1753
  "special": true
1754
  },
1755
  "128219": {
1756
- "content": "<|reserved_special_token_214|>",
1757
  "lstrip": false,
1758
  "normalized": false,
1759
  "rstrip": false,
@@ -1761,7 +1761,7 @@
1761
  "special": true
1762
  },
1763
  "128220": {
1764
- "content": "<|reserved_special_token_215|>",
1765
  "lstrip": false,
1766
  "normalized": false,
1767
  "rstrip": false,
@@ -1769,7 +1769,7 @@
1769
  "special": true
1770
  },
1771
  "128221": {
1772
- "content": "<|reserved_special_token_216|>",
1773
  "lstrip": false,
1774
  "normalized": false,
1775
  "rstrip": false,
@@ -1777,7 +1777,7 @@
1777
  "special": true
1778
  },
1779
  "128222": {
1780
- "content": "<|reserved_special_token_217|>",
1781
  "lstrip": false,
1782
  "normalized": false,
1783
  "rstrip": false,
@@ -1785,7 +1785,7 @@
1785
  "special": true
1786
  },
1787
  "128223": {
1788
- "content": "<|reserved_special_token_218|>",
1789
  "lstrip": false,
1790
  "normalized": false,
1791
  "rstrip": false,
@@ -1793,7 +1793,7 @@
1793
  "special": true
1794
  },
1795
  "128224": {
1796
- "content": "<|reserved_special_token_219|>",
1797
  "lstrip": false,
1798
  "normalized": false,
1799
  "rstrip": false,
@@ -1801,7 +1801,7 @@
1801
  "special": true
1802
  },
1803
  "128225": {
1804
- "content": "<|reserved_special_token_220|>",
1805
  "lstrip": false,
1806
  "normalized": false,
1807
  "rstrip": false,
@@ -1809,7 +1809,7 @@
1809
  "special": true
1810
  },
1811
  "128226": {
1812
- "content": "<|reserved_special_token_221|>",
1813
  "lstrip": false,
1814
  "normalized": false,
1815
  "rstrip": false,
@@ -1817,7 +1817,7 @@
1817
  "special": true
1818
  },
1819
  "128227": {
1820
- "content": "<|reserved_special_token_222|>",
1821
  "lstrip": false,
1822
  "normalized": false,
1823
  "rstrip": false,
@@ -1825,7 +1825,7 @@
1825
  "special": true
1826
  },
1827
  "128228": {
1828
- "content": "<|reserved_special_token_223|>",
1829
  "lstrip": false,
1830
  "normalized": false,
1831
  "rstrip": false,
@@ -1833,7 +1833,7 @@
1833
  "special": true
1834
  },
1835
  "128229": {
1836
- "content": "<|reserved_special_token_224|>",
1837
  "lstrip": false,
1838
  "normalized": false,
1839
  "rstrip": false,
@@ -1841,7 +1841,7 @@
1841
  "special": true
1842
  },
1843
  "128230": {
1844
- "content": "<|reserved_special_token_225|>",
1845
  "lstrip": false,
1846
  "normalized": false,
1847
  "rstrip": false,
@@ -1849,7 +1849,7 @@
1849
  "special": true
1850
  },
1851
  "128231": {
1852
- "content": "<|reserved_special_token_226|>",
1853
  "lstrip": false,
1854
  "normalized": false,
1855
  "rstrip": false,
@@ -1857,7 +1857,7 @@
1857
  "special": true
1858
  },
1859
  "128232": {
1860
- "content": "<|reserved_special_token_227|>",
1861
  "lstrip": false,
1862
  "normalized": false,
1863
  "rstrip": false,
@@ -1865,7 +1865,7 @@
1865
  "special": true
1866
  },
1867
  "128233": {
1868
- "content": "<|reserved_special_token_228|>",
1869
  "lstrip": false,
1870
  "normalized": false,
1871
  "rstrip": false,
@@ -1873,7 +1873,7 @@
1873
  "special": true
1874
  },
1875
  "128234": {
1876
- "content": "<|reserved_special_token_229|>",
1877
  "lstrip": false,
1878
  "normalized": false,
1879
  "rstrip": false,
@@ -1881,7 +1881,7 @@
1881
  "special": true
1882
  },
1883
  "128235": {
1884
- "content": "<|reserved_special_token_230|>",
1885
  "lstrip": false,
1886
  "normalized": false,
1887
  "rstrip": false,
@@ -1889,7 +1889,7 @@
1889
  "special": true
1890
  },
1891
  "128236": {
1892
- "content": "<|reserved_special_token_231|>",
1893
  "lstrip": false,
1894
  "normalized": false,
1895
  "rstrip": false,
@@ -1897,7 +1897,7 @@
1897
  "special": true
1898
  },
1899
  "128237": {
1900
- "content": "<|reserved_special_token_232|>",
1901
  "lstrip": false,
1902
  "normalized": false,
1903
  "rstrip": false,
@@ -1905,7 +1905,7 @@
1905
  "special": true
1906
  },
1907
  "128238": {
1908
- "content": "<|reserved_special_token_233|>",
1909
  "lstrip": false,
1910
  "normalized": false,
1911
  "rstrip": false,
@@ -1913,7 +1913,7 @@
1913
  "special": true
1914
  },
1915
  "128239": {
1916
- "content": "<|reserved_special_token_234|>",
1917
  "lstrip": false,
1918
  "normalized": false,
1919
  "rstrip": false,
@@ -1921,7 +1921,7 @@
1921
  "special": true
1922
  },
1923
  "128240": {
1924
- "content": "<|reserved_special_token_235|>",
1925
  "lstrip": false,
1926
  "normalized": false,
1927
  "rstrip": false,
@@ -1929,7 +1929,7 @@
1929
  "special": true
1930
  },
1931
  "128241": {
1932
- "content": "<|reserved_special_token_236|>",
1933
  "lstrip": false,
1934
  "normalized": false,
1935
  "rstrip": false,
@@ -1937,7 +1937,7 @@
1937
  "special": true
1938
  },
1939
  "128242": {
1940
- "content": "<|reserved_special_token_237|>",
1941
  "lstrip": false,
1942
  "normalized": false,
1943
  "rstrip": false,
@@ -1945,7 +1945,7 @@
1945
  "special": true
1946
  },
1947
  "128243": {
1948
- "content": "<|reserved_special_token_238|>",
1949
  "lstrip": false,
1950
  "normalized": false,
1951
  "rstrip": false,
@@ -1953,7 +1953,7 @@
1953
  "special": true
1954
  },
1955
  "128244": {
1956
- "content": "<|reserved_special_token_239|>",
1957
  "lstrip": false,
1958
  "normalized": false,
1959
  "rstrip": false,
@@ -1961,7 +1961,7 @@
1961
  "special": true
1962
  },
1963
  "128245": {
1964
- "content": "<|reserved_special_token_240|>",
1965
  "lstrip": false,
1966
  "normalized": false,
1967
  "rstrip": false,
@@ -1969,7 +1969,7 @@
1969
  "special": true
1970
  },
1971
  "128246": {
1972
- "content": "<|reserved_special_token_241|>",
1973
  "lstrip": false,
1974
  "normalized": false,
1975
  "rstrip": false,
@@ -1977,7 +1977,7 @@
1977
  "special": true
1978
  },
1979
  "128247": {
1980
- "content": "<|reserved_special_token_242|>",
1981
  "lstrip": false,
1982
  "normalized": false,
1983
  "rstrip": false,
@@ -1985,7 +1985,7 @@
1985
  "special": true
1986
  },
1987
  "128248": {
1988
- "content": "<|reserved_special_token_243|>",
1989
  "lstrip": false,
1990
  "normalized": false,
1991
  "rstrip": false,
@@ -1993,7 +1993,7 @@
1993
  "special": true
1994
  },
1995
  "128249": {
1996
- "content": "<|reserved_special_token_244|>",
1997
  "lstrip": false,
1998
  "normalized": false,
1999
  "rstrip": false,
@@ -2001,7 +2001,7 @@
2001
  "special": true
2002
  },
2003
  "128250": {
2004
- "content": "<|reserved_special_token_245|>",
2005
  "lstrip": false,
2006
  "normalized": false,
2007
  "rstrip": false,
@@ -2009,7 +2009,7 @@
2009
  "special": true
2010
  },
2011
  "128251": {
2012
- "content": "<|reserved_special_token_246|>",
2013
  "lstrip": false,
2014
  "normalized": false,
2015
  "rstrip": false,
@@ -2017,7 +2017,7 @@
2017
  "special": true
2018
  },
2019
  "128252": {
2020
- "content": "<|reserved_special_token_247|>",
2021
  "lstrip": false,
2022
  "normalized": false,
2023
  "rstrip": false,
@@ -2025,7 +2025,7 @@
2025
  "special": true
2026
  },
2027
  "128253": {
2028
- "content": "<|reserved_special_token_248|>",
2029
  "lstrip": false,
2030
  "normalized": false,
2031
  "rstrip": false,
@@ -2033,7 +2033,7 @@
2033
  "special": true
2034
  },
2035
  "128254": {
2036
- "content": "<|reserved_special_token_249|>",
2037
  "lstrip": false,
2038
  "normalized": false,
2039
  "rstrip": false,
@@ -2041,7 +2041,7 @@
2041
  "special": true
2042
  },
2043
  "128255": {
2044
- "content": "<|reserved_special_token_250|>",
2045
  "lstrip": false,
2046
  "normalized": false,
2047
  "rstrip": false,
@@ -2050,7 +2050,7 @@
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
- "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
2056
  "extra_special_tokens": {},
@@ -2058,7 +2058,7 @@
2058
  "input_ids",
2059
  "attention_mask"
2060
  ],
2061
- "model_max_length": 1000000000000000019884624838656,
2062
- "pad_token": "<|eot_id|>",
2063
  "tokenizer_class": "PreTrainedTokenizerFast"
2064
  }
 
33
  "special": true
34
  },
35
  "128004": {
36
+ "content": "<|finetune_right_pad_id|>",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
 
41
  "special": true
42
  },
43
  "128005": {
44
+ "content": "<|reserved_special_token_2|>",
45
  "lstrip": false,
46
  "normalized": false,
47
  "rstrip": false,
 
65
  "special": true
66
  },
67
  "128008": {
68
+ "content": "<|eom_id|>",
69
  "lstrip": false,
70
  "normalized": false,
71
  "rstrip": false,
 
81
  "special": true
82
  },
83
  "128010": {
84
+ "content": "<|python_tag|>",
85
  "lstrip": false,
86
  "normalized": false,
87
  "rstrip": false,
 
89
  "special": true
90
  },
91
  "128011": {
92
+ "content": "<|reserved_special_token_3|>",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
 
97
  "special": true
98
  },
99
  "128012": {
100
+ "content": "<|reserved_special_token_4|>",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
 
105
  "special": true
106
  },
107
  "128013": {
108
+ "content": "<|reserved_special_token_5|>",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
 
113
  "special": true
114
  },
115
  "128014": {
116
+ "content": "<|reserved_special_token_6|>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
 
121
  "special": true
122
  },
123
  "128015": {
124
+ "content": "<|reserved_special_token_7|>",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
 
129
  "special": true
130
  },
131
  "128016": {
132
+ "content": "<|reserved_special_token_8|>",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,
 
137
  "special": true
138
  },
139
  "128017": {
140
+ "content": "<|reserved_special_token_9|>",
141
  "lstrip": false,
142
  "normalized": false,
143
  "rstrip": false,
 
145
  "special": true
146
  },
147
  "128018": {
148
+ "content": "<|reserved_special_token_10|>",
149
  "lstrip": false,
150
  "normalized": false,
151
  "rstrip": false,
 
153
  "special": true
154
  },
155
  "128019": {
156
+ "content": "<|reserved_special_token_11|>",
157
  "lstrip": false,
158
  "normalized": false,
159
  "rstrip": false,
 
161
  "special": true
162
  },
163
  "128020": {
164
+ "content": "<|reserved_special_token_12|>",
165
  "lstrip": false,
166
  "normalized": false,
167
  "rstrip": false,
 
169
  "special": true
170
  },
171
  "128021": {
172
+ "content": "<|reserved_special_token_13|>",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
 
177
  "special": true
178
  },
179
  "128022": {
180
+ "content": "<|reserved_special_token_14|>",
181
  "lstrip": false,
182
  "normalized": false,
183
  "rstrip": false,
 
185
  "special": true
186
  },
187
  "128023": {
188
+ "content": "<|reserved_special_token_15|>",
189
  "lstrip": false,
190
  "normalized": false,
191
  "rstrip": false,
 
193
  "special": true
194
  },
195
  "128024": {
196
+ "content": "<|reserved_special_token_16|>",
197
  "lstrip": false,
198
  "normalized": false,
199
  "rstrip": false,
 
201
  "special": true
202
  },
203
  "128025": {
204
+ "content": "<|reserved_special_token_17|>",
205
  "lstrip": false,
206
  "normalized": false,
207
  "rstrip": false,
 
209
  "special": true
210
  },
211
  "128026": {
212
+ "content": "<|reserved_special_token_18|>",
213
  "lstrip": false,
214
  "normalized": false,
215
  "rstrip": false,
 
217
  "special": true
218
  },
219
  "128027": {
220
+ "content": "<|reserved_special_token_19|>",
221
  "lstrip": false,
222
  "normalized": false,
223
  "rstrip": false,
 
225
  "special": true
226
  },
227
  "128028": {
228
+ "content": "<|reserved_special_token_20|>",
229
  "lstrip": false,
230
  "normalized": false,
231
  "rstrip": false,
 
233
  "special": true
234
  },
235
  "128029": {
236
+ "content": "<|reserved_special_token_21|>",
237
  "lstrip": false,
238
  "normalized": false,
239
  "rstrip": false,
 
241
  "special": true
242
  },
243
  "128030": {
244
+ "content": "<|reserved_special_token_22|>",
245
  "lstrip": false,
246
  "normalized": false,
247
  "rstrip": false,
 
249
  "special": true
250
  },
251
  "128031": {
252
+ "content": "<|reserved_special_token_23|>",
253
  "lstrip": false,
254
  "normalized": false,
255
  "rstrip": false,
 
257
  "special": true
258
  },
259
  "128032": {
260
+ "content": "<|reserved_special_token_24|>",
261
  "lstrip": false,
262
  "normalized": false,
263
  "rstrip": false,
 
265
  "special": true
266
  },
267
  "128033": {
268
+ "content": "<|reserved_special_token_25|>",
269
  "lstrip": false,
270
  "normalized": false,
271
  "rstrip": false,
 
273
  "special": true
274
  },
275
  "128034": {
276
+ "content": "<|reserved_special_token_26|>",
277
  "lstrip": false,
278
  "normalized": false,
279
  "rstrip": false,
 
281
  "special": true
282
  },
283
  "128035": {
284
+ "content": "<|reserved_special_token_27|>",
285
  "lstrip": false,
286
  "normalized": false,
287
  "rstrip": false,
 
289
  "special": true
290
  },
291
  "128036": {
292
+ "content": "<|reserved_special_token_28|>",
293
  "lstrip": false,
294
  "normalized": false,
295
  "rstrip": false,
 
297
  "special": true
298
  },
299
  "128037": {
300
+ "content": "<|reserved_special_token_29|>",
301
  "lstrip": false,
302
  "normalized": false,
303
  "rstrip": false,
 
305
  "special": true
306
  },
307
  "128038": {
308
+ "content": "<|reserved_special_token_30|>",
309
  "lstrip": false,
310
  "normalized": false,
311
  "rstrip": false,
 
313
  "special": true
314
  },
315
  "128039": {
316
+ "content": "<|reserved_special_token_31|>",
317
  "lstrip": false,
318
  "normalized": false,
319
  "rstrip": false,
 
321
  "special": true
322
  },
323
  "128040": {
324
+ "content": "<|reserved_special_token_32|>",
325
  "lstrip": false,
326
  "normalized": false,
327
  "rstrip": false,
 
329
  "special": true
330
  },
331
  "128041": {
332
+ "content": "<|reserved_special_token_33|>",
333
  "lstrip": false,
334
  "normalized": false,
335
  "rstrip": false,
 
337
  "special": true
338
  },
339
  "128042": {
340
+ "content": "<|reserved_special_token_34|>",
341
  "lstrip": false,
342
  "normalized": false,
343
  "rstrip": false,
 
345
  "special": true
346
  },
347
  "128043": {
348
+ "content": "<|reserved_special_token_35|>",
349
  "lstrip": false,
350
  "normalized": false,
351
  "rstrip": false,
 
353
  "special": true
354
  },
355
  "128044": {
356
+ "content": "<|reserved_special_token_36|>",
357
  "lstrip": false,
358
  "normalized": false,
359
  "rstrip": false,
 
361
  "special": true
362
  },
363
  "128045": {
364
+ "content": "<|reserved_special_token_37|>",
365
  "lstrip": false,
366
  "normalized": false,
367
  "rstrip": false,
 
369
  "special": true
370
  },
371
  "128046": {
372
+ "content": "<|reserved_special_token_38|>",
373
  "lstrip": false,
374
  "normalized": false,
375
  "rstrip": false,
 
377
  "special": true
378
  },
379
  "128047": {
380
+ "content": "<|reserved_special_token_39|>",
381
  "lstrip": false,
382
  "normalized": false,
383
  "rstrip": false,
 
385
  "special": true
386
  },
387
  "128048": {
388
+ "content": "<|reserved_special_token_40|>",
389
  "lstrip": false,
390
  "normalized": false,
391
  "rstrip": false,
 
393
  "special": true
394
  },
395
  "128049": {
396
+ "content": "<|reserved_special_token_41|>",
397
  "lstrip": false,
398
  "normalized": false,
399
  "rstrip": false,
 
401
  "special": true
402
  },
403
  "128050": {
404
+ "content": "<|reserved_special_token_42|>",
405
  "lstrip": false,
406
  "normalized": false,
407
  "rstrip": false,
 
409
  "special": true
410
  },
411
  "128051": {
412
+ "content": "<|reserved_special_token_43|>",
413
  "lstrip": false,
414
  "normalized": false,
415
  "rstrip": false,
 
417
  "special": true
418
  },
419
  "128052": {
420
+ "content": "<|reserved_special_token_44|>",
421
  "lstrip": false,
422
  "normalized": false,
423
  "rstrip": false,
 
425
  "special": true
426
  },
427
  "128053": {
428
+ "content": "<|reserved_special_token_45|>",
429
  "lstrip": false,
430
  "normalized": false,
431
  "rstrip": false,
 
433
  "special": true
434
  },
435
  "128054": {
436
+ "content": "<|reserved_special_token_46|>",
437
  "lstrip": false,
438
  "normalized": false,
439
  "rstrip": false,
 
441
  "special": true
442
  },
443
  "128055": {
444
+ "content": "<|reserved_special_token_47|>",
445
  "lstrip": false,
446
  "normalized": false,
447
  "rstrip": false,
 
449
  "special": true
450
  },
451
  "128056": {
452
+ "content": "<|reserved_special_token_48|>",
453
  "lstrip": false,
454
  "normalized": false,
455
  "rstrip": false,
 
457
  "special": true
458
  },
459
  "128057": {
460
+ "content": "<|reserved_special_token_49|>",
461
  "lstrip": false,
462
  "normalized": false,
463
  "rstrip": false,
 
465
  "special": true
466
  },
467
  "128058": {
468
+ "content": "<|reserved_special_token_50|>",
469
  "lstrip": false,
470
  "normalized": false,
471
  "rstrip": false,
 
473
  "special": true
474
  },
475
  "128059": {
476
+ "content": "<|reserved_special_token_51|>",
477
  "lstrip": false,
478
  "normalized": false,
479
  "rstrip": false,
 
481
  "special": true
482
  },
483
  "128060": {
484
+ "content": "<|reserved_special_token_52|>",
485
  "lstrip": false,
486
  "normalized": false,
487
  "rstrip": false,
 
489
  "special": true
490
  },
491
  "128061": {
492
+ "content": "<|reserved_special_token_53|>",
493
  "lstrip": false,
494
  "normalized": false,
495
  "rstrip": false,
 
497
  "special": true
498
  },
499
  "128062": {
500
+ "content": "<|reserved_special_token_54|>",
501
  "lstrip": false,
502
  "normalized": false,
503
  "rstrip": false,
 
505
  "special": true
506
  },
507
  "128063": {
508
+ "content": "<|reserved_special_token_55|>",
509
  "lstrip": false,
510
  "normalized": false,
511
  "rstrip": false,
 
513
  "special": true
514
  },
515
  "128064": {
516
+ "content": "<|reserved_special_token_56|>",
517
  "lstrip": false,
518
  "normalized": false,
519
  "rstrip": false,
 
521
  "special": true
522
  },
523
  "128065": {
524
+ "content": "<|reserved_special_token_57|>",
525
  "lstrip": false,
526
  "normalized": false,
527
  "rstrip": false,
 
529
  "special": true
530
  },
531
  "128066": {
532
+ "content": "<|reserved_special_token_58|>",
533
  "lstrip": false,
534
  "normalized": false,
535
  "rstrip": false,
 
537
  "special": true
538
  },
539
  "128067": {
540
+ "content": "<|reserved_special_token_59|>",
541
  "lstrip": false,
542
  "normalized": false,
543
  "rstrip": false,
 
545
  "special": true
546
  },
547
  "128068": {
548
+ "content": "<|reserved_special_token_60|>",
549
  "lstrip": false,
550
  "normalized": false,
551
  "rstrip": false,
 
553
  "special": true
554
  },
555
  "128069": {
556
+ "content": "<|reserved_special_token_61|>",
557
  "lstrip": false,
558
  "normalized": false,
559
  "rstrip": false,
 
561
  "special": true
562
  },
563
  "128070": {
564
+ "content": "<|reserved_special_token_62|>",
565
  "lstrip": false,
566
  "normalized": false,
567
  "rstrip": false,
 
569
  "special": true
570
  },
571
  "128071": {
572
+ "content": "<|reserved_special_token_63|>",
573
  "lstrip": false,
574
  "normalized": false,
575
  "rstrip": false,
 
577
  "special": true
578
  },
579
  "128072": {
580
+ "content": "<|reserved_special_token_64|>",
581
  "lstrip": false,
582
  "normalized": false,
583
  "rstrip": false,
 
585
  "special": true
586
  },
587
  "128073": {
588
+ "content": "<|reserved_special_token_65|>",
589
  "lstrip": false,
590
  "normalized": false,
591
  "rstrip": false,
 
593
  "special": true
594
  },
595
  "128074": {
596
+ "content": "<|reserved_special_token_66|>",
597
  "lstrip": false,
598
  "normalized": false,
599
  "rstrip": false,
 
601
  "special": true
602
  },
603
  "128075": {
604
+ "content": "<|reserved_special_token_67|>",
605
  "lstrip": false,
606
  "normalized": false,
607
  "rstrip": false,
 
609
  "special": true
610
  },
611
  "128076": {
612
+ "content": "<|reserved_special_token_68|>",
613
  "lstrip": false,
614
  "normalized": false,
615
  "rstrip": false,
 
617
  "special": true
618
  },
619
  "128077": {
620
+ "content": "<|reserved_special_token_69|>",
621
  "lstrip": false,
622
  "normalized": false,
623
  "rstrip": false,
 
625
  "special": true
626
  },
627
  "128078": {
628
+ "content": "<|reserved_special_token_70|>",
629
  "lstrip": false,
630
  "normalized": false,
631
  "rstrip": false,
 
633
  "special": true
634
  },
635
  "128079": {
636
+ "content": "<|reserved_special_token_71|>",
637
  "lstrip": false,
638
  "normalized": false,
639
  "rstrip": false,
 
641
  "special": true
642
  },
643
  "128080": {
644
+ "content": "<|reserved_special_token_72|>",
645
  "lstrip": false,
646
  "normalized": false,
647
  "rstrip": false,
 
649
  "special": true
650
  },
651
  "128081": {
652
+ "content": "<|reserved_special_token_73|>",
653
  "lstrip": false,
654
  "normalized": false,
655
  "rstrip": false,
 
657
  "special": true
658
  },
659
  "128082": {
660
+ "content": "<|reserved_special_token_74|>",
661
  "lstrip": false,
662
  "normalized": false,
663
  "rstrip": false,
 
665
  "special": true
666
  },
667
  "128083": {
668
+ "content": "<|reserved_special_token_75|>",
669
  "lstrip": false,
670
  "normalized": false,
671
  "rstrip": false,
 
673
  "special": true
674
  },
675
  "128084": {
676
+ "content": "<|reserved_special_token_76|>",
677
  "lstrip": false,
678
  "normalized": false,
679
  "rstrip": false,
 
681
  "special": true
682
  },
683
  "128085": {
684
+ "content": "<|reserved_special_token_77|>",
685
  "lstrip": false,
686
  "normalized": false,
687
  "rstrip": false,
 
689
  "special": true
690
  },
691
  "128086": {
692
+ "content": "<|reserved_special_token_78|>",
693
  "lstrip": false,
694
  "normalized": false,
695
  "rstrip": false,
 
697
  "special": true
698
  },
699
  "128087": {
700
+ "content": "<|reserved_special_token_79|>",
701
  "lstrip": false,
702
  "normalized": false,
703
  "rstrip": false,
 
705
  "special": true
706
  },
707
  "128088": {
708
+ "content": "<|reserved_special_token_80|>",
709
  "lstrip": false,
710
  "normalized": false,
711
  "rstrip": false,
 
713
  "special": true
714
  },
715
  "128089": {
716
+ "content": "<|reserved_special_token_81|>",
717
  "lstrip": false,
718
  "normalized": false,
719
  "rstrip": false,
 
721
  "special": true
722
  },
723
  "128090": {
724
+ "content": "<|reserved_special_token_82|>",
725
  "lstrip": false,
726
  "normalized": false,
727
  "rstrip": false,
 
729
  "special": true
730
  },
731
  "128091": {
732
+ "content": "<|reserved_special_token_83|>",
733
  "lstrip": false,
734
  "normalized": false,
735
  "rstrip": false,
 
737
  "special": true
738
  },
739
  "128092": {
740
+ "content": "<|reserved_special_token_84|>",
741
  "lstrip": false,
742
  "normalized": false,
743
  "rstrip": false,
 
745
  "special": true
746
  },
747
  "128093": {
748
+ "content": "<|reserved_special_token_85|>",
749
  "lstrip": false,
750
  "normalized": false,
751
  "rstrip": false,
 
753
  "special": true
754
  },
755
  "128094": {
756
+ "content": "<|reserved_special_token_86|>",
757
  "lstrip": false,
758
  "normalized": false,
759
  "rstrip": false,
 
761
  "special": true
762
  },
763
  "128095": {
764
+ "content": "<|reserved_special_token_87|>",
765
  "lstrip": false,
766
  "normalized": false,
767
  "rstrip": false,
 
769
  "special": true
770
  },
771
  "128096": {
772
+ "content": "<|reserved_special_token_88|>",
773
  "lstrip": false,
774
  "normalized": false,
775
  "rstrip": false,
 
777
  "special": true
778
  },
779
  "128097": {
780
+ "content": "<|reserved_special_token_89|>",
781
  "lstrip": false,
782
  "normalized": false,
783
  "rstrip": false,
 
785
  "special": true
786
  },
787
  "128098": {
788
+ "content": "<|reserved_special_token_90|>",
789
  "lstrip": false,
790
  "normalized": false,
791
  "rstrip": false,
 
793
  "special": true
794
  },
795
  "128099": {
796
+ "content": "<|reserved_special_token_91|>",
797
  "lstrip": false,
798
  "normalized": false,
799
  "rstrip": false,
 
801
  "special": true
802
  },
803
  "128100": {
804
+ "content": "<|reserved_special_token_92|>",
805
  "lstrip": false,
806
  "normalized": false,
807
  "rstrip": false,
 
809
  "special": true
810
  },
811
  "128101": {
812
+ "content": "<|reserved_special_token_93|>",
813
  "lstrip": false,
814
  "normalized": false,
815
  "rstrip": false,
 
817
  "special": true
818
  },
819
  "128102": {
820
+ "content": "<|reserved_special_token_94|>",
821
  "lstrip": false,
822
  "normalized": false,
823
  "rstrip": false,
 
825
  "special": true
826
  },
827
  "128103": {
828
+ "content": "<|reserved_special_token_95|>",
829
  "lstrip": false,
830
  "normalized": false,
831
  "rstrip": false,
 
833
  "special": true
834
  },
835
  "128104": {
836
+ "content": "<|reserved_special_token_96|>",
837
  "lstrip": false,
838
  "normalized": false,
839
  "rstrip": false,
 
841
  "special": true
842
  },
843
  "128105": {
844
+ "content": "<|reserved_special_token_97|>",
845
  "lstrip": false,
846
  "normalized": false,
847
  "rstrip": false,
 
849
  "special": true
850
  },
851
  "128106": {
852
+ "content": "<|reserved_special_token_98|>",
853
  "lstrip": false,
854
  "normalized": false,
855
  "rstrip": false,
 
857
  "special": true
858
  },
859
  "128107": {
860
+ "content": "<|reserved_special_token_99|>",
861
  "lstrip": false,
862
  "normalized": false,
863
  "rstrip": false,
 
865
  "special": true
866
  },
867
  "128108": {
868
+ "content": "<|reserved_special_token_100|>",
869
  "lstrip": false,
870
  "normalized": false,
871
  "rstrip": false,
 
873
  "special": true
874
  },
875
  "128109": {
876
+ "content": "<|reserved_special_token_101|>",
877
  "lstrip": false,
878
  "normalized": false,
879
  "rstrip": false,
 
881
  "special": true
882
  },
883
  "128110": {
884
+ "content": "<|reserved_special_token_102|>",
885
  "lstrip": false,
886
  "normalized": false,
887
  "rstrip": false,
 
889
  "special": true
890
  },
891
  "128111": {
892
+ "content": "<|reserved_special_token_103|>",
893
  "lstrip": false,
894
  "normalized": false,
895
  "rstrip": false,
 
897
  "special": true
898
  },
899
  "128112": {
900
+ "content": "<|reserved_special_token_104|>",
901
  "lstrip": false,
902
  "normalized": false,
903
  "rstrip": false,
 
905
  "special": true
906
  },
907
  "128113": {
908
+ "content": "<|reserved_special_token_105|>",
909
  "lstrip": false,
910
  "normalized": false,
911
  "rstrip": false,
 
913
  "special": true
914
  },
915
  "128114": {
916
+ "content": "<|reserved_special_token_106|>",
917
  "lstrip": false,
918
  "normalized": false,
919
  "rstrip": false,
 
921
  "special": true
922
  },
923
  "128115": {
924
+ "content": "<|reserved_special_token_107|>",
925
  "lstrip": false,
926
  "normalized": false,
927
  "rstrip": false,
 
929
  "special": true
930
  },
931
  "128116": {
932
+ "content": "<|reserved_special_token_108|>",
933
  "lstrip": false,
934
  "normalized": false,
935
  "rstrip": false,
 
937
  "special": true
938
  },
939
  "128117": {
940
+ "content": "<|reserved_special_token_109|>",
941
  "lstrip": false,
942
  "normalized": false,
943
  "rstrip": false,
 
945
  "special": true
946
  },
947
  "128118": {
948
+ "content": "<|reserved_special_token_110|>",
949
  "lstrip": false,
950
  "normalized": false,
951
  "rstrip": false,
 
953
  "special": true
954
  },
955
  "128119": {
956
+ "content": "<|reserved_special_token_111|>",
957
  "lstrip": false,
958
  "normalized": false,
959
  "rstrip": false,
 
961
  "special": true
962
  },
963
  "128120": {
964
+ "content": "<|reserved_special_token_112|>",
965
  "lstrip": false,
966
  "normalized": false,
967
  "rstrip": false,
 
969
  "special": true
970
  },
971
  "128121": {
972
+ "content": "<|reserved_special_token_113|>",
973
  "lstrip": false,
974
  "normalized": false,
975
  "rstrip": false,
 
977
  "special": true
978
  },
979
  "128122": {
980
+ "content": "<|reserved_special_token_114|>",
981
  "lstrip": false,
982
  "normalized": false,
983
  "rstrip": false,
 
985
  "special": true
986
  },
987
  "128123": {
988
+ "content": "<|reserved_special_token_115|>",
989
  "lstrip": false,
990
  "normalized": false,
991
  "rstrip": false,
 
993
  "special": true
994
  },
995
  "128124": {
996
+ "content": "<|reserved_special_token_116|>",
997
  "lstrip": false,
998
  "normalized": false,
999
  "rstrip": false,
 
1001
  "special": true
1002
  },
1003
  "128125": {
1004
+ "content": "<|reserved_special_token_117|>",
1005
  "lstrip": false,
1006
  "normalized": false,
1007
  "rstrip": false,
 
1009
  "special": true
1010
  },
1011
  "128126": {
1012
+ "content": "<|reserved_special_token_118|>",
1013
  "lstrip": false,
1014
  "normalized": false,
1015
  "rstrip": false,
 
1017
  "special": true
1018
  },
1019
  "128127": {
1020
+ "content": "<|reserved_special_token_119|>",
1021
  "lstrip": false,
1022
  "normalized": false,
1023
  "rstrip": false,
 
1025
  "special": true
1026
  },
1027
  "128128": {
1028
+ "content": "<|reserved_special_token_120|>",
1029
  "lstrip": false,
1030
  "normalized": false,
1031
  "rstrip": false,
 
1033
  "special": true
1034
  },
1035
  "128129": {
1036
+ "content": "<|reserved_special_token_121|>",
1037
  "lstrip": false,
1038
  "normalized": false,
1039
  "rstrip": false,
 
1041
  "special": true
1042
  },
1043
  "128130": {
1044
+ "content": "<|reserved_special_token_122|>",
1045
  "lstrip": false,
1046
  "normalized": false,
1047
  "rstrip": false,
 
1049
  "special": true
1050
  },
1051
  "128131": {
1052
+ "content": "<|reserved_special_token_123|>",
1053
  "lstrip": false,
1054
  "normalized": false,
1055
  "rstrip": false,
 
1057
  "special": true
1058
  },
1059
  "128132": {
1060
+ "content": "<|reserved_special_token_124|>",
1061
  "lstrip": false,
1062
  "normalized": false,
1063
  "rstrip": false,
 
1065
  "special": true
1066
  },
1067
  "128133": {
1068
+ "content": "<|reserved_special_token_125|>",
1069
  "lstrip": false,
1070
  "normalized": false,
1071
  "rstrip": false,
 
1073
  "special": true
1074
  },
1075
  "128134": {
1076
+ "content": "<|reserved_special_token_126|>",
1077
  "lstrip": false,
1078
  "normalized": false,
1079
  "rstrip": false,
 
1081
  "special": true
1082
  },
1083
  "128135": {
1084
+ "content": "<|reserved_special_token_127|>",
1085
  "lstrip": false,
1086
  "normalized": false,
1087
  "rstrip": false,
 
1089
  "special": true
1090
  },
1091
  "128136": {
1092
+ "content": "<|reserved_special_token_128|>",
1093
  "lstrip": false,
1094
  "normalized": false,
1095
  "rstrip": false,
 
1097
  "special": true
1098
  },
1099
  "128137": {
1100
+ "content": "<|reserved_special_token_129|>",
1101
  "lstrip": false,
1102
  "normalized": false,
1103
  "rstrip": false,
 
1105
  "special": true
1106
  },
1107
  "128138": {
1108
+ "content": "<|reserved_special_token_130|>",
1109
  "lstrip": false,
1110
  "normalized": false,
1111
  "rstrip": false,
 
1113
  "special": true
1114
  },
1115
  "128139": {
1116
+ "content": "<|reserved_special_token_131|>",
1117
  "lstrip": false,
1118
  "normalized": false,
1119
  "rstrip": false,
 
1121
  "special": true
1122
  },
1123
  "128140": {
1124
+ "content": "<|reserved_special_token_132|>",
1125
  "lstrip": false,
1126
  "normalized": false,
1127
  "rstrip": false,
 
1129
  "special": true
1130
  },
1131
  "128141": {
1132
+ "content": "<|reserved_special_token_133|>",
1133
  "lstrip": false,
1134
  "normalized": false,
1135
  "rstrip": false,
 
1137
  "special": true
1138
  },
1139
  "128142": {
1140
+ "content": "<|reserved_special_token_134|>",
1141
  "lstrip": false,
1142
  "normalized": false,
1143
  "rstrip": false,
 
1145
  "special": true
1146
  },
1147
  "128143": {
1148
+ "content": "<|reserved_special_token_135|>",
1149
  "lstrip": false,
1150
  "normalized": false,
1151
  "rstrip": false,
 
1153
  "special": true
1154
  },
1155
  "128144": {
1156
+ "content": "<|reserved_special_token_136|>",
1157
  "lstrip": false,
1158
  "normalized": false,
1159
  "rstrip": false,
 
1161
  "special": true
1162
  },
1163
  "128145": {
1164
+ "content": "<|reserved_special_token_137|>",
1165
  "lstrip": false,
1166
  "normalized": false,
1167
  "rstrip": false,
 
1169
  "special": true
1170
  },
1171
  "128146": {
1172
+ "content": "<|reserved_special_token_138|>",
1173
  "lstrip": false,
1174
  "normalized": false,
1175
  "rstrip": false,
 
1177
  "special": true
1178
  },
1179
  "128147": {
1180
+ "content": "<|reserved_special_token_139|>",
1181
  "lstrip": false,
1182
  "normalized": false,
1183
  "rstrip": false,
 
1185
  "special": true
1186
  },
1187
  "128148": {
1188
+ "content": "<|reserved_special_token_140|>",
1189
  "lstrip": false,
1190
  "normalized": false,
1191
  "rstrip": false,
 
1193
  "special": true
1194
  },
1195
  "128149": {
1196
+ "content": "<|reserved_special_token_141|>",
1197
  "lstrip": false,
1198
  "normalized": false,
1199
  "rstrip": false,
 
1201
  "special": true
1202
  },
1203
  "128150": {
1204
+ "content": "<|reserved_special_token_142|>",
1205
  "lstrip": false,
1206
  "normalized": false,
1207
  "rstrip": false,
 
1209
  "special": true
1210
  },
1211
  "128151": {
1212
+ "content": "<|reserved_special_token_143|>",
1213
  "lstrip": false,
1214
  "normalized": false,
1215
  "rstrip": false,
 
1217
  "special": true
1218
  },
1219
  "128152": {
1220
+ "content": "<|reserved_special_token_144|>",
1221
  "lstrip": false,
1222
  "normalized": false,
1223
  "rstrip": false,
 
1225
  "special": true
1226
  },
1227
  "128153": {
1228
+ "content": "<|reserved_special_token_145|>",
1229
  "lstrip": false,
1230
  "normalized": false,
1231
  "rstrip": false,
 
1233
  "special": true
1234
  },
1235
  "128154": {
1236
+ "content": "<|reserved_special_token_146|>",
1237
  "lstrip": false,
1238
  "normalized": false,
1239
  "rstrip": false,
 
1241
  "special": true
1242
  },
1243
  "128155": {
1244
+ "content": "<|reserved_special_token_147|>",
1245
  "lstrip": false,
1246
  "normalized": false,
1247
  "rstrip": false,
 
1249
  "special": true
1250
  },
1251
  "128156": {
1252
+ "content": "<|reserved_special_token_148|>",
1253
  "lstrip": false,
1254
  "normalized": false,
1255
  "rstrip": false,
 
1257
  "special": true
1258
  },
1259
  "128157": {
1260
+ "content": "<|reserved_special_token_149|>",
1261
  "lstrip": false,
1262
  "normalized": false,
1263
  "rstrip": false,
 
1265
  "special": true
1266
  },
1267
  "128158": {
1268
+ "content": "<|reserved_special_token_150|>",
1269
  "lstrip": false,
1270
  "normalized": false,
1271
  "rstrip": false,
 
1273
  "special": true
1274
  },
1275
  "128159": {
1276
+ "content": "<|reserved_special_token_151|>",
1277
  "lstrip": false,
1278
  "normalized": false,
1279
  "rstrip": false,
 
1281
  "special": true
1282
  },
1283
  "128160": {
1284
+ "content": "<|reserved_special_token_152|>",
1285
  "lstrip": false,
1286
  "normalized": false,
1287
  "rstrip": false,
 
1289
  "special": true
1290
  },
1291
  "128161": {
1292
+ "content": "<|reserved_special_token_153|>",
1293
  "lstrip": false,
1294
  "normalized": false,
1295
  "rstrip": false,
 
1297
  "special": true
1298
  },
1299
  "128162": {
1300
+ "content": "<|reserved_special_token_154|>",
1301
  "lstrip": false,
1302
  "normalized": false,
1303
  "rstrip": false,
 
1305
  "special": true
1306
  },
1307
  "128163": {
1308
+ "content": "<|reserved_special_token_155|>",
1309
  "lstrip": false,
1310
  "normalized": false,
1311
  "rstrip": false,
 
1313
  "special": true
1314
  },
1315
  "128164": {
1316
+ "content": "<|reserved_special_token_156|>",
1317
  "lstrip": false,
1318
  "normalized": false,
1319
  "rstrip": false,
 
1321
  "special": true
1322
  },
1323
  "128165": {
1324
+ "content": "<|reserved_special_token_157|>",
1325
  "lstrip": false,
1326
  "normalized": false,
1327
  "rstrip": false,
 
1329
  "special": true
1330
  },
1331
  "128166": {
1332
+ "content": "<|reserved_special_token_158|>",
1333
  "lstrip": false,
1334
  "normalized": false,
1335
  "rstrip": false,
 
1337
  "special": true
1338
  },
1339
  "128167": {
1340
+ "content": "<|reserved_special_token_159|>",
1341
  "lstrip": false,
1342
  "normalized": false,
1343
  "rstrip": false,
 
1345
  "special": true
1346
  },
1347
  "128168": {
1348
+ "content": "<|reserved_special_token_160|>",
1349
  "lstrip": false,
1350
  "normalized": false,
1351
  "rstrip": false,
 
1353
  "special": true
1354
  },
1355
  "128169": {
1356
+ "content": "<|reserved_special_token_161|>",
1357
  "lstrip": false,
1358
  "normalized": false,
1359
  "rstrip": false,
 
1361
  "special": true
1362
  },
1363
  "128170": {
1364
+ "content": "<|reserved_special_token_162|>",
1365
  "lstrip": false,
1366
  "normalized": false,
1367
  "rstrip": false,
 
1369
  "special": true
1370
  },
1371
  "128171": {
1372
+ "content": "<|reserved_special_token_163|>",
1373
  "lstrip": false,
1374
  "normalized": false,
1375
  "rstrip": false,
 
1377
  "special": true
1378
  },
1379
  "128172": {
1380
+ "content": "<|reserved_special_token_164|>",
1381
  "lstrip": false,
1382
  "normalized": false,
1383
  "rstrip": false,
 
1385
  "special": true
1386
  },
1387
  "128173": {
1388
+ "content": "<|reserved_special_token_165|>",
1389
  "lstrip": false,
1390
  "normalized": false,
1391
  "rstrip": false,
 
1393
  "special": true
1394
  },
1395
  "128174": {
1396
+ "content": "<|reserved_special_token_166|>",
1397
  "lstrip": false,
1398
  "normalized": false,
1399
  "rstrip": false,
 
1401
  "special": true
1402
  },
1403
  "128175": {
1404
+ "content": "<|reserved_special_token_167|>",
1405
  "lstrip": false,
1406
  "normalized": false,
1407
  "rstrip": false,
 
1409
  "special": true
1410
  },
1411
  "128176": {
1412
+ "content": "<|reserved_special_token_168|>",
1413
  "lstrip": false,
1414
  "normalized": false,
1415
  "rstrip": false,
 
1417
  "special": true
1418
  },
1419
  "128177": {
1420
+ "content": "<|reserved_special_token_169|>",
1421
  "lstrip": false,
1422
  "normalized": false,
1423
  "rstrip": false,
 
1425
  "special": true
1426
  },
1427
  "128178": {
1428
+ "content": "<|reserved_special_token_170|>",
1429
  "lstrip": false,
1430
  "normalized": false,
1431
  "rstrip": false,
 
1433
  "special": true
1434
  },
1435
  "128179": {
1436
+ "content": "<|reserved_special_token_171|>",
1437
  "lstrip": false,
1438
  "normalized": false,
1439
  "rstrip": false,
 
1441
  "special": true
1442
  },
1443
  "128180": {
1444
+ "content": "<|reserved_special_token_172|>",
1445
  "lstrip": false,
1446
  "normalized": false,
1447
  "rstrip": false,
 
1449
  "special": true
1450
  },
1451
  "128181": {
1452
+ "content": "<|reserved_special_token_173|>",
1453
  "lstrip": false,
1454
  "normalized": false,
1455
  "rstrip": false,
 
1457
  "special": true
1458
  },
1459
  "128182": {
1460
+ "content": "<|reserved_special_token_174|>",
1461
  "lstrip": false,
1462
  "normalized": false,
1463
  "rstrip": false,
 
1465
  "special": true
1466
  },
1467
  "128183": {
1468
+ "content": "<|reserved_special_token_175|>",
1469
  "lstrip": false,
1470
  "normalized": false,
1471
  "rstrip": false,
 
1473
  "special": true
1474
  },
1475
  "128184": {
1476
+ "content": "<|reserved_special_token_176|>",
1477
  "lstrip": false,
1478
  "normalized": false,
1479
  "rstrip": false,
 
1481
  "special": true
1482
  },
1483
  "128185": {
1484
+ "content": "<|reserved_special_token_177|>",
1485
  "lstrip": false,
1486
  "normalized": false,
1487
  "rstrip": false,
 
1489
  "special": true
1490
  },
1491
  "128186": {
1492
+ "content": "<|reserved_special_token_178|>",
1493
  "lstrip": false,
1494
  "normalized": false,
1495
  "rstrip": false,
 
1497
  "special": true
1498
  },
1499
  "128187": {
1500
+ "content": "<|reserved_special_token_179|>",
1501
  "lstrip": false,
1502
  "normalized": false,
1503
  "rstrip": false,
 
1505
  "special": true
1506
  },
1507
  "128188": {
1508
+ "content": "<|reserved_special_token_180|>",
1509
  "lstrip": false,
1510
  "normalized": false,
1511
  "rstrip": false,
 
1513
  "special": true
1514
  },
1515
  "128189": {
1516
+ "content": "<|reserved_special_token_181|>",
1517
  "lstrip": false,
1518
  "normalized": false,
1519
  "rstrip": false,
 
1521
  "special": true
1522
  },
1523
  "128190": {
1524
+ "content": "<|reserved_special_token_182|>",
1525
  "lstrip": false,
1526
  "normalized": false,
1527
  "rstrip": false,
 
1529
  "special": true
1530
  },
1531
  "128191": {
1532
+ "content": "<|reserved_special_token_183|>",
1533
  "lstrip": false,
1534
  "normalized": false,
1535
  "rstrip": false,
 
1537
  "special": true
1538
  },
1539
  "128192": {
1540
+ "content": "<|reserved_special_token_184|>",
1541
  "lstrip": false,
1542
  "normalized": false,
1543
  "rstrip": false,
 
1545
  "special": true
1546
  },
1547
  "128193": {
1548
+ "content": "<|reserved_special_token_185|>",
1549
  "lstrip": false,
1550
  "normalized": false,
1551
  "rstrip": false,
 
1553
  "special": true
1554
  },
1555
  "128194": {
1556
+ "content": "<|reserved_special_token_186|>",
1557
  "lstrip": false,
1558
  "normalized": false,
1559
  "rstrip": false,
 
1561
  "special": true
1562
  },
1563
  "128195": {
1564
+ "content": "<|reserved_special_token_187|>",
1565
  "lstrip": false,
1566
  "normalized": false,
1567
  "rstrip": false,
 
1569
  "special": true
1570
  },
1571
  "128196": {
1572
+ "content": "<|reserved_special_token_188|>",
1573
  "lstrip": false,
1574
  "normalized": false,
1575
  "rstrip": false,
 
1577
  "special": true
1578
  },
1579
  "128197": {
1580
+ "content": "<|reserved_special_token_189|>",
1581
  "lstrip": false,
1582
  "normalized": false,
1583
  "rstrip": false,
 
1585
  "special": true
1586
  },
1587
  "128198": {
1588
+ "content": "<|reserved_special_token_190|>",
1589
  "lstrip": false,
1590
  "normalized": false,
1591
  "rstrip": false,
 
1593
  "special": true
1594
  },
1595
  "128199": {
1596
+ "content": "<|reserved_special_token_191|>",
1597
  "lstrip": false,
1598
  "normalized": false,
1599
  "rstrip": false,
 
1601
  "special": true
1602
  },
1603
  "128200": {
1604
+ "content": "<|reserved_special_token_192|>",
1605
  "lstrip": false,
1606
  "normalized": false,
1607
  "rstrip": false,
 
1609
  "special": true
1610
  },
1611
  "128201": {
1612
+ "content": "<|reserved_special_token_193|>",
1613
  "lstrip": false,
1614
  "normalized": false,
1615
  "rstrip": false,
 
1617
  "special": true
1618
  },
1619
  "128202": {
1620
+ "content": "<|reserved_special_token_194|>",
1621
  "lstrip": false,
1622
  "normalized": false,
1623
  "rstrip": false,
 
1625
  "special": true
1626
  },
1627
  "128203": {
1628
+ "content": "<|reserved_special_token_195|>",
1629
  "lstrip": false,
1630
  "normalized": false,
1631
  "rstrip": false,
 
1633
  "special": true
1634
  },
1635
  "128204": {
1636
+ "content": "<|reserved_special_token_196|>",
1637
  "lstrip": false,
1638
  "normalized": false,
1639
  "rstrip": false,
 
1641
  "special": true
1642
  },
1643
  "128205": {
1644
+ "content": "<|reserved_special_token_197|>",
1645
  "lstrip": false,
1646
  "normalized": false,
1647
  "rstrip": false,
 
1649
  "special": true
1650
  },
1651
  "128206": {
1652
+ "content": "<|reserved_special_token_198|>",
1653
  "lstrip": false,
1654
  "normalized": false,
1655
  "rstrip": false,
 
1657
  "special": true
1658
  },
1659
  "128207": {
1660
+ "content": "<|reserved_special_token_199|>",
1661
  "lstrip": false,
1662
  "normalized": false,
1663
  "rstrip": false,
 
1665
  "special": true
1666
  },
1667
  "128208": {
1668
+ "content": "<|reserved_special_token_200|>",
1669
  "lstrip": false,
1670
  "normalized": false,
1671
  "rstrip": false,
 
1673
  "special": true
1674
  },
1675
  "128209": {
1676
+ "content": "<|reserved_special_token_201|>",
1677
  "lstrip": false,
1678
  "normalized": false,
1679
  "rstrip": false,
 
1681
  "special": true
1682
  },
1683
  "128210": {
1684
+ "content": "<|reserved_special_token_202|>",
1685
  "lstrip": false,
1686
  "normalized": false,
1687
  "rstrip": false,
 
1689
  "special": true
1690
  },
1691
  "128211": {
1692
+ "content": "<|reserved_special_token_203|>",
1693
  "lstrip": false,
1694
  "normalized": false,
1695
  "rstrip": false,
 
1697
  "special": true
1698
  },
1699
  "128212": {
1700
+ "content": "<|reserved_special_token_204|>",
1701
  "lstrip": false,
1702
  "normalized": false,
1703
  "rstrip": false,
 
1705
  "special": true
1706
  },
1707
  "128213": {
1708
+ "content": "<|reserved_special_token_205|>",
1709
  "lstrip": false,
1710
  "normalized": false,
1711
  "rstrip": false,
 
1713
  "special": true
1714
  },
1715
  "128214": {
1716
+ "content": "<|reserved_special_token_206|>",
1717
  "lstrip": false,
1718
  "normalized": false,
1719
  "rstrip": false,
 
1721
  "special": true
1722
  },
1723
  "128215": {
1724
+ "content": "<|reserved_special_token_207|>",
1725
  "lstrip": false,
1726
  "normalized": false,
1727
  "rstrip": false,
 
1729
  "special": true
1730
  },
1731
  "128216": {
1732
+ "content": "<|reserved_special_token_208|>",
1733
  "lstrip": false,
1734
  "normalized": false,
1735
  "rstrip": false,
 
1737
  "special": true
1738
  },
1739
  "128217": {
1740
+ "content": "<|reserved_special_token_209|>",
1741
  "lstrip": false,
1742
  "normalized": false,
1743
  "rstrip": false,
 
1745
  "special": true
1746
  },
1747
  "128218": {
1748
+ "content": "<|reserved_special_token_210|>",
1749
  "lstrip": false,
1750
  "normalized": false,
1751
  "rstrip": false,
 
1753
  "special": true
1754
  },
1755
  "128219": {
1756
+ "content": "<|reserved_special_token_211|>",
1757
  "lstrip": false,
1758
  "normalized": false,
1759
  "rstrip": false,
 
1761
  "special": true
1762
  },
1763
  "128220": {
1764
+ "content": "<|reserved_special_token_212|>",
1765
  "lstrip": false,
1766
  "normalized": false,
1767
  "rstrip": false,
 
1769
  "special": true
1770
  },
1771
  "128221": {
1772
+ "content": "<|reserved_special_token_213|>",
1773
  "lstrip": false,
1774
  "normalized": false,
1775
  "rstrip": false,
 
1777
  "special": true
1778
  },
1779
  "128222": {
1780
+ "content": "<|reserved_special_token_214|>",
1781
  "lstrip": false,
1782
  "normalized": false,
1783
  "rstrip": false,
 
1785
  "special": true
1786
  },
1787
  "128223": {
1788
+ "content": "<|reserved_special_token_215|>",
1789
  "lstrip": false,
1790
  "normalized": false,
1791
  "rstrip": false,
 
1793
  "special": true
1794
  },
1795
  "128224": {
1796
+ "content": "<|reserved_special_token_216|>",
1797
  "lstrip": false,
1798
  "normalized": false,
1799
  "rstrip": false,
 
1801
  "special": true
1802
  },
1803
  "128225": {
1804
+ "content": "<|reserved_special_token_217|>",
1805
  "lstrip": false,
1806
  "normalized": false,
1807
  "rstrip": false,
 
1809
  "special": true
1810
  },
1811
  "128226": {
1812
+ "content": "<|reserved_special_token_218|>",
1813
  "lstrip": false,
1814
  "normalized": false,
1815
  "rstrip": false,
 
1817
  "special": true
1818
  },
1819
  "128227": {
1820
+ "content": "<|reserved_special_token_219|>",
1821
  "lstrip": false,
1822
  "normalized": false,
1823
  "rstrip": false,
 
1825
  "special": true
1826
  },
1827
  "128228": {
1828
+ "content": "<|reserved_special_token_220|>",
1829
  "lstrip": false,
1830
  "normalized": false,
1831
  "rstrip": false,
 
1833
  "special": true
1834
  },
1835
  "128229": {
1836
+ "content": "<|reserved_special_token_221|>",
1837
  "lstrip": false,
1838
  "normalized": false,
1839
  "rstrip": false,
 
1841
  "special": true
1842
  },
1843
  "128230": {
1844
+ "content": "<|reserved_special_token_222|>",
1845
  "lstrip": false,
1846
  "normalized": false,
1847
  "rstrip": false,
 
1849
  "special": true
1850
  },
1851
  "128231": {
1852
+ "content": "<|reserved_special_token_223|>",
1853
  "lstrip": false,
1854
  "normalized": false,
1855
  "rstrip": false,
 
1857
  "special": true
1858
  },
1859
  "128232": {
1860
+ "content": "<|reserved_special_token_224|>",
1861
  "lstrip": false,
1862
  "normalized": false,
1863
  "rstrip": false,
 
1865
  "special": true
1866
  },
1867
  "128233": {
1868
+ "content": "<|reserved_special_token_225|>",
1869
  "lstrip": false,
1870
  "normalized": false,
1871
  "rstrip": false,
 
1873
  "special": true
1874
  },
1875
  "128234": {
1876
+ "content": "<|reserved_special_token_226|>",
1877
  "lstrip": false,
1878
  "normalized": false,
1879
  "rstrip": false,
 
1881
  "special": true
1882
  },
1883
  "128235": {
1884
+ "content": "<|reserved_special_token_227|>",
1885
  "lstrip": false,
1886
  "normalized": false,
1887
  "rstrip": false,
 
1889
  "special": true
1890
  },
1891
  "128236": {
1892
+ "content": "<|reserved_special_token_228|>",
1893
  "lstrip": false,
1894
  "normalized": false,
1895
  "rstrip": false,
 
1897
  "special": true
1898
  },
1899
  "128237": {
1900
+ "content": "<|reserved_special_token_229|>",
1901
  "lstrip": false,
1902
  "normalized": false,
1903
  "rstrip": false,
 
1905
  "special": true
1906
  },
1907
  "128238": {
1908
+ "content": "<|reserved_special_token_230|>",
1909
  "lstrip": false,
1910
  "normalized": false,
1911
  "rstrip": false,
 
1913
  "special": true
1914
  },
1915
  "128239": {
1916
+ "content": "<|reserved_special_token_231|>",
1917
  "lstrip": false,
1918
  "normalized": false,
1919
  "rstrip": false,
 
1921
  "special": true
1922
  },
1923
  "128240": {
1924
+ "content": "<|reserved_special_token_232|>",
1925
  "lstrip": false,
1926
  "normalized": false,
1927
  "rstrip": false,
 
1929
  "special": true
1930
  },
1931
  "128241": {
1932
+ "content": "<|reserved_special_token_233|>",
1933
  "lstrip": false,
1934
  "normalized": false,
1935
  "rstrip": false,
 
1937
  "special": true
1938
  },
1939
  "128242": {
1940
+ "content": "<|reserved_special_token_234|>",
1941
  "lstrip": false,
1942
  "normalized": false,
1943
  "rstrip": false,
 
1945
  "special": true
1946
  },
1947
  "128243": {
1948
+ "content": "<|reserved_special_token_235|>",
1949
  "lstrip": false,
1950
  "normalized": false,
1951
  "rstrip": false,
 
1953
  "special": true
1954
  },
1955
  "128244": {
1956
+ "content": "<|reserved_special_token_236|>",
1957
  "lstrip": false,
1958
  "normalized": false,
1959
  "rstrip": false,
 
1961
  "special": true
1962
  },
1963
  "128245": {
1964
+ "content": "<|reserved_special_token_237|>",
1965
  "lstrip": false,
1966
  "normalized": false,
1967
  "rstrip": false,
 
1969
  "special": true
1970
  },
1971
  "128246": {
1972
+ "content": "<|reserved_special_token_238|>",
1973
  "lstrip": false,
1974
  "normalized": false,
1975
  "rstrip": false,
 
1977
  "special": true
1978
  },
1979
  "128247": {
1980
+ "content": "<|reserved_special_token_239|>",
1981
  "lstrip": false,
1982
  "normalized": false,
1983
  "rstrip": false,
 
1985
  "special": true
1986
  },
1987
  "128248": {
1988
+ "content": "<|reserved_special_token_240|>",
1989
  "lstrip": false,
1990
  "normalized": false,
1991
  "rstrip": false,
 
1993
  "special": true
1994
  },
1995
  "128249": {
1996
+ "content": "<|reserved_special_token_241|>",
1997
  "lstrip": false,
1998
  "normalized": false,
1999
  "rstrip": false,
 
2001
  "special": true
2002
  },
2003
  "128250": {
2004
+ "content": "<|reserved_special_token_242|>",
2005
  "lstrip": false,
2006
  "normalized": false,
2007
  "rstrip": false,
 
2009
  "special": true
2010
  },
2011
  "128251": {
2012
+ "content": "<|reserved_special_token_243|>",
2013
  "lstrip": false,
2014
  "normalized": false,
2015
  "rstrip": false,
 
2017
  "special": true
2018
  },
2019
  "128252": {
2020
+ "content": "<|reserved_special_token_244|>",
2021
  "lstrip": false,
2022
  "normalized": false,
2023
  "rstrip": false,
 
2025
  "special": true
2026
  },
2027
  "128253": {
2028
+ "content": "<|reserved_special_token_245|>",
2029
  "lstrip": false,
2030
  "normalized": false,
2031
  "rstrip": false,
 
2033
  "special": true
2034
  },
2035
  "128254": {
2036
+ "content": "<|reserved_special_token_246|>",
2037
  "lstrip": false,
2038
  "normalized": false,
2039
  "rstrip": false,
 
2041
  "special": true
2042
  },
2043
  "128255": {
2044
+ "content": "<|reserved_special_token_247|>",
2045
  "lstrip": false,
2046
  "normalized": false,
2047
  "rstrip": false,
 
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
2056
  "extra_special_tokens": {},
 
2058
  "input_ids",
2059
  "attention_mask"
2060
  ],
2061
+ "model_max_length": 131072,
2062
+ "pad_token": "<|finetune_right_pad_id|>",
2063
  "tokenizer_class": "PreTrainedTokenizerFast"
2064
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6dd834175a287d18ac573bf7692c0748daf530ca2617b3815794efadbe38366d
3
- size 9016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc9fabf5284190749103c589d3582af6efc98a3f8ba5436ee09b8a2fb8018761
3
+ size 8184