| 01/18/2024 19:24:50 - WARNING - llmtuner.model.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training. | |
| [INFO|training_args.py:1838] 2024-01-18 19:24:50,331 >> PyTorch: setting up devices | |
| /home/hangyu5/anaconda3/envs/llama_factory/lib/python3.11/site-packages/transformers/training_args.py:1751: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of π€ Transformers. Use `--hub_token` instead. | |
| warnings.warn( | |
| 01/18/2024 19:24:50 - INFO - llmtuner.model.parser - Process rank: 0, device: cuda:0, n_gpu: 1 | |
| distributed training: True, compute dtype: None | |
| 01/18/2024 19:24:50 - INFO - llmtuner.model.parser - Training/evaluation parameters Seq2SeqTrainingArguments( | |
| _n_gpu=1, | |
| adafactor=False, | |
| adam_beta1=0.9, | |
| adam_beta2=0.999, | |
| adam_epsilon=1e-08, | |
| auto_find_batch_size=False, | |
| bf16=False, | |
| bf16_full_eval=False, | |
| data_seed=None, | |
| dataloader_drop_last=False, | |
| dataloader_num_workers=0, | |
| dataloader_persistent_workers=False, | |
| dataloader_pin_memory=True, | |
| ddp_backend=None, | |
| ddp_broadcast_buffers=None, | |
| ddp_bucket_cap_mb=None, | |
| ddp_find_unused_parameters=False, | |
| ddp_timeout=1800, | |
| debug=[], | |
| deepspeed=None, | |
| disable_tqdm=False, | |
| dispatch_batches=None, | |
| do_eval=False, | |
| do_predict=True, | |
| do_train=False, | |
| eval_accumulation_steps=None, | |
| eval_delay=0, | |
| eval_steps=None, | |
| evaluation_strategy=IntervalStrategy.NO, | |
| fp16=False, | |
| fp16_backend=auto, | |
| fp16_full_eval=False, | |
| fp16_opt_level=O1, | |
| fsdp=[], | |
| fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, | |
| fsdp_min_num_params=0, | |
| fsdp_transformer_layer_cls_to_wrap=None, | |
| full_determinism=False, | |
| generation_config=None, | |
| generation_max_length=None, | |
| generation_num_beams=None, | |
| gradient_accumulation_steps=1, | |
| gradient_checkpointing=False, | |
| gradient_checkpointing_kwargs=None, | |
| greater_is_better=None, | |
| group_by_length=False, | |
| half_precision_backend=auto, | |
| hub_always_push=False, | |
| hub_model_id=None, | |
| hub_private_repo=False, | |
| hub_strategy=HubStrategy.EVERY_SAVE, | |
| hub_token=<HUB_TOKEN>, | |
| ignore_data_skip=False, | |
| include_inputs_for_metrics=False, | |
| include_num_input_tokens_seen=False, | |
| include_tokens_per_second=False, | |
| jit_mode_eval=False, | |
| label_names=None, | |
| label_smoothing_factor=0.0, | |
| learning_rate=5e-05, | |
| length_column_name=length, | |
| load_best_model_at_end=False, | |
| local_rank=0, | |
| log_level=passive, | |
| log_level_replica=warning, | |
| log_on_each_node=True, | |
| logging_dir=./models/sft/LMCocktail-10.7B-v1-sft-glaive-function-calling-v2-ep1-lora/Predict_20/runs/Jan18_19-24-50_yhyu13fuwuqi, | |
| logging_first_step=False, | |
| logging_nan_inf_filter=True, | |
| logging_steps=500, | |
| logging_strategy=IntervalStrategy.STEPS, | |
| lr_scheduler_kwargs={}, | |
| lr_scheduler_type=SchedulerType.LINEAR, | |
| max_grad_norm=1.0, | |
| max_steps=-1, | |
| metric_for_best_model=None, | |
| mp_parameters=, | |
| neftune_noise_alpha=None, | |
| no_cuda=False, | |
| num_train_epochs=3.0, | |
| optim=OptimizerNames.ADAMW_TORCH, | |
| optim_args=None, | |
| output_dir=./models/sft/LMCocktail-10.7B-v1-sft-glaive-function-calling-v2-ep1-lora/Predict_20, | |
| overwrite_output_dir=False, | |
| past_index=-1, | |
| per_device_eval_batch_size=1, | |
| per_device_train_batch_size=8, | |
| predict_with_generate=True, | |
| prediction_loss_only=False, | |
| push_to_hub=False, | |
| push_to_hub_model_id=None, | |
| push_to_hub_organization=None, | |
| push_to_hub_token=<PUSH_TO_HUB_TOKEN>, | |
| ray_scope=last, | |
| remove_unused_columns=True, | |
| report_to=['tensorboard'], | |
| resume_from_checkpoint=None, | |
| run_name=./models/sft/LMCocktail-10.7B-v1-sft-glaive-function-calling-v2-ep1-lora/Predict_20, | |
| save_on_each_node=False, | |
| save_only_model=False, | |
| save_safetensors=True, | |
| save_steps=500, | |
| save_strategy=IntervalStrategy.STEPS, | |
| save_total_limit=None, | |
| seed=42, | |
| skip_memory_metrics=True, | |
| sortish_sampler=False, | |
| split_batches=False, | |
| tf32=None, | |
| torch_compile=False, | |
| torch_compile_backend=None, | |
| torch_compile_mode=None, | |
| torchdynamo=None, | |
| tpu_metrics_debug=False, | |
| tpu_num_cores=None, | |
| use_cpu=False, | |
| use_ipex=False, | |
| use_legacy_prediction_loop=False, | |
| use_mps_device=False, | |
| warmup_ratio=0.0, | |
| warmup_steps=0, | |
| weight_decay=0.0, | |
| ) | |
| 01/18/2024 19:24:50 - INFO - llmtuner.data.loader - Loading dataset ./glaive-function-calling-v2-llama-factory-convert/simple-function-calling-v2_converted_2000.json... | |
| 01/18/2024 19:24:50 - WARNING - llmtuner.data.utils - Checksum failed: missing SHA-1 hash value in dataset_info.json. | |
| Using custom data configuration default-cb85ddec01d455d4 | |
| Loading Dataset Infos from /home/hangyu5/anaconda3/envs/llama_factory/lib/python3.11/site-packages/datasets/packaged_modules/json | |
| Overwrite dataset info from restored data version if exists. | |
| Loading Dataset info from /home/hangyu5/.cache/huggingface/datasets/json/default-cb85ddec01d455d4/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 | |
| Found cached dataset json (/home/hangyu5/.cache/huggingface/datasets/json/default-cb85ddec01d455d4/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96) | |
| Loading Dataset info from /home/hangyu5/.cache/huggingface/datasets/json/default-cb85ddec01d455d4/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96 | |
| [INFO|tokenization_utils_base.py:2024] 2024-01-18 19:24:51,385 >> loading file tokenizer.model | |
| [INFO|tokenization_utils_base.py:2024] 2024-01-18 19:24:51,385 >> loading file added_tokens.json | |
| [INFO|tokenization_utils_base.py:2024] 2024-01-18 19:24:51,385 >> loading file special_tokens_map.json | |
| [INFO|tokenization_utils_base.py:2024] 2024-01-18 19:24:51,385 >> loading file tokenizer_config.json | |
| [INFO|tokenization_utils_base.py:2024] 2024-01-18 19:24:51,385 >> loading file tokenizer.json | |
| [INFO|configuration_utils.py:737] 2024-01-18 19:24:51,427 >> loading configuration file Yhyu13/LMCocktail-10.7B-v1/config.json | |
| [INFO|configuration_utils.py:802] 2024-01-18 19:24:51,428 >> Model config LlamaConfig { | |
| "_name_or_path": "Yhyu13/LMCocktail-10.7B-v1", | |
| "architectures": [ | |
| "LlamaForCausalLM" | |
| ], | |
| "attention_bias": false, | |
| "attention_dropout": 0.0, | |
| "bos_token_id": 1, | |
| "eos_token_id": 2, | |
| "hidden_act": "silu", | |
| "hidden_size": 4096, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 14336, | |
| "max_position_embeddings": 4096, | |
| "model_type": "llama", | |
| "num_attention_heads": 32, | |
| "num_hidden_layers": 48, | |
| "num_key_value_heads": 8, | |
| "pad_token_id": 2, | |
| "pretraining_tp": 1, | |
| "rms_norm_eps": 1e-05, | |
| "rope_scaling": null, | |
| "rope_theta": 10000.0, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "float16", | |
| "transformers_version": "4.36.2", | |
| "use_cache": true, | |
| "vocab_size": 32000 | |
| } | |
| [INFO|modeling_utils.py:3341] 2024-01-18 19:24:51,444 >> loading weights file Yhyu13/LMCocktail-10.7B-v1/model.safetensors.index.json | |
| [INFO|modeling_utils.py:1341] 2024-01-18 19:24:51,444 >> Instantiating LlamaForCausalLM model under default dtype torch.float16. | |
| [INFO|configuration_utils.py:826] 2024-01-18 19:24:51,445 >> Generate config GenerationConfig { | |
| "bos_token_id": 1, | |
| "eos_token_id": 2, | |
| "pad_token_id": 2 | |
| } | |
| Loading checkpoint shards: 0%| | 0/5 [00:00<?, ?it/s] | |
| Loading checkpoint shards: 20%|ββ | 1/5 [00:00<00:00, 6.36it/s] | |
| Loading checkpoint shards: 40%|ββββ | 2/5 [00:00<00:00, 6.36it/s]Yhyu13/LMCocktail-10.7B-v1 | |
| Loading checkpoint shards: 60%|ββββββ | 3/5 [00:00<00:00, 6.36it/s] | |
| Loading checkpoint shards: 80%|ββββββββ | 4/5 [00:00<00:00, 6.36it/s]Yhyu13/LMCocktail-10.7B-v1 | |
| Loading checkpoint shards: 100%|ββββββββββ| 5/5 [00:00<00:00, 6.42it/s] | |
| Loading checkpoint shards: 100%|ββββββββββ| 5/5 [00:00<00:00, 6.39it/s] | |
| [INFO|modeling_utils.py:4185] 2024-01-18 19:24:52,397 >> All model checkpoint weights were used when initializing LlamaForCausalLM. | |
| [INFO|modeling_utils.py:4193] 2024-01-18 19:24:52,397 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at ./models/LMCocktail-10.7B-v1. | |
| If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. | |
| [INFO|configuration_utils.py:779] 2024-01-18 19:24:52,400 >> loading configuration file ./models/LMCocktail-10.7B-v1/generation_config.json | |
| [INFO|configuration_utils.py:826] 2024-01-18 19:24:52,400 >> Generate config GenerationConfig { | |
| "bos_token_id": 1, | |
| "eos_token_id": 2, | |
| "pad_token_id": 2, | |
| "use_cache": false | |
| } | |
| 01/18/2024 19:24:52 - INFO - llmtuner.model.adapter - Fine-tuning method: LoRA | |
| 01/18/2024 19:24:54 - INFO - llmtuner.model.adapter - Merged 1 adapter(s). | |
| 01/18/2024 19:24:54 - INFO - llmtuner.model.adapter - Loaded adapter(s): ./models/sft/LMCocktail-10.7B-v1-sft-glaive-function-calling-v2-ep1-lora | |
| 01/18/2024 19:24:54 - INFO - llmtuner.model.loader - trainable params: 0 || all params: 10731524096 || trainable%: 0.0000 | |
| 01/18/2024 19:24:54 - INFO - llmtuner.model.loader - This IS expected that the trainable params is 0 if you are using model for inference only. | |
| Running tokenizer on dataset: 0%| | 0/20 [00:00<?, ? examples/s]Caching processed dataset at /home/hangyu5/.cache/huggingface/datasets/json/default-cb85ddec01d455d4/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-700bf363697824f9.arrow | |
| Running tokenizer on dataset: 100%|ββββββββββ| 20/20 [00:00<00:00, 529.06 examples/s] | |
| [INFO|training_args.py:1838] 2024-01-18 19:24:54,939 >> PyTorch: setting up devices | |
| Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. | |
| [INFO|trainer.py:3166] 2024-01-18 19:24:57,618 >> ***** Running Prediction ***** | |
| [INFO|trainer.py:3168] 2024-01-18 19:24:57,618 >> Num examples = 20 | |
| [INFO|trainer.py:3171] 2024-01-18 19:24:57,618 >> Batch size = 1 | |
| [INFO|configuration_utils.py:826] 2024-01-18 19:24:57,631 >> Generate config GenerationConfig { | |
| "bos_token_id": 1, | |
| "eos_token_id": 2, | |
| "pad_token_id": 2 | |
| } | |
| /home/hangyu5/anaconda3/envs/llama_factory/lib/python3.11/site-packages/transformers/generation/utils.py:1518: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration ) | |
| warnings.warn( | |
| input_ids: | |
| [1, 774, 1247, 28747, 13, 27842, 28747, 995, 460, 264, 10865, 13892, 395, 2735, 298, 272, 2296, 5572, 28723, 5938, 706, 513, 3030, 387, 13, 28751, 13, 2287, 345, 861, 1264, 345, 527, 28730, 720, 4078, 28730, 6036, 548, 13, 2287, 345, 6518, 1264, 345, 1458, 272, 8877, 4338, 1444, 989, 1191, 951, 20023, 548, 13, 2287, 345, 11438, 1264, 371, 13, 5390, 345, 1123, 1264, 345, 2814, 548, 13, 5390, 345, 10723, 1264, 371, 13, 17422, 345, 2893, 28730, 16714, 1264, 371, 13, 1417, 28705, 345, 1123, 1264, 345, 1427, 548, 13, 1417, 28705, 345, 6518, 1264, 345, 1014, 15547, 298, 6603, 477, 28739, 13, 17422, 1630, 13, 17422, 345, 3731, 28730, 16714, 1264, 371, 13, 1417, 28705, 345, 1123, 1264, 345, 1427, 548, 13, 1417, 28705, 345, 6518, 1264, 345, 1014, 15547, 298, 6603, 298, 28739, 13, 17422, 443, 13, 5390, 1630, 13, 5390, 345, 10893, 1264, 733, 13, 17422, 345, 2893, 28730, 16714, 548, 13, 17422, 345, 3731, 28730, 16714, 28739, 13, 5390, 4709, 13, 2287, 443, 13, 28752, 13, 13, 6325, 368, 1820, 264, 9314, 354, 528, 477, 1450, 2726, 298, 4222, 28804, 13, 13, 27332, 21631, 28747, 13] | |
| inputs: | |
| <s>### User: | |
| SYSTEM: You are a helpful assistant with access to the following functions. Use them if required - | |
| { | |
| "name": "get_exchange_rate", | |
| "description": "Get the exchange rate between two currencies", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "base_currency": { | |
| "type": "string", | |
| "description": "The currency to convert from" | |
| }, | |
| "target_currency": { | |
| "type": "string", | |
| "description": "The currency to convert to" | |
| } | |
| }, | |
| "required": [ | |
| "base_currency", | |
| "target_currency" | |
| ] | |
| } | |
| } | |
| Can you book a flight for me from New York to London? | |
| ### Assistant: | |
| 0%| | 0/20 [00:00<?, ?it/s] | |
| 10%|β | 2/20 [00:01<00:13, 1.31it/s] | |
| 15%|ββ | 3/20 [00:04<00:26, 1.56s/it] | |
| 20%|ββ | 4/20 [00:05<00:25, 1.60s/it] | |
| 25%|βββ | 5/20 [00:09<00:32, 2.16s/it] | |
| 30%|βββ | 6/20 [00:10<00:26, 1.87s/it] | |
| 35%|ββββ | 7/20 [00:12<00:24, 1.87s/it] | |
| 40%|ββββ | 8/20 [00:13<00:21, 1.79s/it] | |
| 45%|βββββ | 9/20 [00:15<00:17, 1.62s/it] | |
| 50%|βββββ | 10/20 [00:17<00:18, 1.81s/it] | |
| 55%|ββββββ | 11/20 [00:18<00:15, 1.75s/it] | |
| 60%|ββββββ | 12/20 [00:19<00:12, 1.51s/it] | |
| 65%|βββββββ | 13/20 [00:22<00:12, 1.79s/it] | |
| 70%|βββββββ | 14/20 [00:23<00:08, 1.50s/it] | |
| 75%|ββββββββ | 15/20 [00:26<00:10, 2.07s/it] | |
| 80%|ββββββββ | 16/20 [00:27<00:06, 1.74s/it] | |
| 85%|βββββββββ | 17/20 [00:29<00:05, 1.79s/it] | |
| 90%|βββββββββ | 18/20 [00:31<00:03, 1.99s/it] | |
| 95%|ββββββββββ| 19/20 [00:32<00:01, 1.63s/it] | |
| 100%|ββββββββββ| 20/20 [00:34<00:00, 1.72s/it]Building prefix dict from the default dictionary ... | |
| Loading model from cache /tmp/jieba.cache | |
| Loading model cost 0.675 seconds. | |
| Prefix dict has been built successfully. | |
| 100%|ββββββββββ| 20/20 [00:35<00:00, 1.77s/it] | |
| ***** predict metrics ***** | |
| predict_bleu-4 = 84.0251 | |
| predict_rouge-1 = 88.6553 | |
| predict_rouge-2 = 80.2374 | |
| predict_rouge-l = 86.4698 | |
| predict_runtime = 0:00:37.47 | |
| predict_samples_per_second = 0.534 | |
| predict_steps_per_second = 0.534 | |
| 01/18/2024 19:25:35 - INFO - llmtuner.train.sft.trainer - Saving prediction results to ./models/sft/LMCocktail-10.7B-v1-sft-glaive-function-calling-v2-ep1-lora/Predict_20/generated_predictions.jsonl | |