| [WARNING|2025-02-04 05:46:33] logging.py:162 >> We recommend enable `upcast_layernorm` in quantized training. |
|
|
| [INFO|2025-02-04 05:46:33] parser.py:355 >> Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16 |
|
|
| [INFO|2025-02-04 05:46:33] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/config.json |
|
|
| [INFO|2025-02-04 05:46:33] configuration_utils.py:746 >> Model config MistralConfig { |
| "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3", |
| "architectures": [ |
| "MistralForCausalLM" |
| ], |
| "attention_dropout": 0.0, |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "head_dim": 128, |
| "hidden_act": "silu", |
| "hidden_size": 4096, |
| "initializer_range": 0.02, |
| "intermediate_size": 14336, |
| "max_position_embeddings": 32768, |
| "model_type": "mistral", |
| "num_attention_heads": 32, |
| "num_hidden_layers": 32, |
| "num_key_value_heads": 8, |
| "rms_norm_eps": 1e-05, |
| "rope_theta": 1000000.0, |
| "sliding_window": null, |
| "tie_word_embeddings": false, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.46.1", |
| "use_cache": true, |
| "vocab_size": 32768 |
| } |
|
|
|
|
| [INFO|2025-02-04 05:46:33] tokenization_utils_base.py:2211 >> loading file tokenizer.model from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/tokenizer.model |
|
|
| [INFO|2025-02-04 05:46:33] tokenization_utils_base.py:2211 >> loading file tokenizer.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/tokenizer.json |
|
|
| [INFO|2025-02-04 05:46:33] tokenization_utils_base.py:2211 >> loading file added_tokens.json from cache at None |
|
|
| [INFO|2025-02-04 05:46:33] tokenization_utils_base.py:2211 >> loading file special_tokens_map.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/special_tokens_map.json |
|
|
| [INFO|2025-02-04 05:46:33] tokenization_utils_base.py:2211 >> loading file tokenizer_config.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/tokenizer_config.json |
|
|
| [INFO|2025-02-04 05:46:34] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/config.json |
|
|
| [INFO|2025-02-04 05:46:34] configuration_utils.py:746 >> Model config MistralConfig { |
| "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3", |
| "architectures": [ |
| "MistralForCausalLM" |
| ], |
| "attention_dropout": 0.0, |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "head_dim": 128, |
| "hidden_act": "silu", |
| "hidden_size": 4096, |
| "initializer_range": 0.02, |
| "intermediate_size": 14336, |
| "max_position_embeddings": 32768, |
| "model_type": "mistral", |
| "num_attention_heads": 32, |
| "num_hidden_layers": 32, |
| "num_key_value_heads": 8, |
| "rms_norm_eps": 1e-05, |
| "rope_theta": 1000000.0, |
| "sliding_window": null, |
| "tie_word_embeddings": false, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.46.1", |
| "use_cache": true, |
| "vocab_size": 32768 |
| } |
|
|
|
|
| [INFO|2025-02-04 05:46:34] tokenization_utils_base.py:2211 >> loading file tokenizer.model from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/tokenizer.model |
|
|
| [INFO|2025-02-04 05:46:34] tokenization_utils_base.py:2211 >> loading file tokenizer.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/tokenizer.json |
|
|
| [INFO|2025-02-04 05:46:34] tokenization_utils_base.py:2211 >> loading file added_tokens.json from cache at None |
|
|
| [INFO|2025-02-04 05:46:34] tokenization_utils_base.py:2211 >> loading file special_tokens_map.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/special_tokens_map.json |
|
|
| [INFO|2025-02-04 05:46:34] tokenization_utils_base.py:2211 >> loading file tokenizer_config.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/tokenizer_config.json |
|
|
| [INFO|2025-02-04 05:46:34] logging.py:157 >> Add pad token: </s> |
|
|
| [INFO|2025-02-04 05:46:34] logging.py:157 >> Loading dataset train_new.json... |
|
|
| [INFO|2025-02-04 05:46:36] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/config.json |
|
|
| [INFO|2025-02-04 05:46:36] configuration_utils.py:746 >> Model config MistralConfig { |
| "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3", |
| "architectures": [ |
| "MistralForCausalLM" |
| ], |
| "attention_dropout": 0.0, |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "head_dim": 128, |
| "hidden_act": "silu", |
| "hidden_size": 4096, |
| "initializer_range": 0.02, |
| "intermediate_size": 14336, |
| "max_position_embeddings": 32768, |
| "model_type": "mistral", |
| "num_attention_heads": 32, |
| "num_hidden_layers": 32, |
| "num_key_value_heads": 8, |
| "rms_norm_eps": 1e-05, |
| "rope_theta": 1000000.0, |
| "sliding_window": null, |
| "tie_word_embeddings": false, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.46.1", |
| "use_cache": true, |
| "vocab_size": 32768 |
| } |
|
|
|
|
| [INFO|2025-02-04 05:46:36] logging.py:157 >> Quantizing model to 4 bit with bitsandbytes. |
|
|
| [INFO|2025-02-04 05:46:37] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unsloth--mistral-7b-instruct-v0.3-bnb-4bit/snapshots/d5f623888f1415cf89b5c208d09cb620694618ee/config.json |
|
|
| [INFO|2025-02-04 05:46:37] configuration_utils.py:746 >> Model config MistralConfig { |
| "_name_or_path": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit", |
| "architectures": [ |
| "MistralForCausalLM" |
| ], |
| "attention_dropout": 0.0, |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "head_dim": 128, |
| "hidden_act": "silu", |
| "hidden_size": 4096, |
| "initializer_range": 0.02, |
| "intermediate_size": 14336, |
| "max_position_embeddings": 32768, |
| "model_type": "mistral", |
| "num_attention_heads": 32, |
| "num_hidden_layers": 32, |
| "num_key_value_heads": 8, |
| "pad_token_id": 770, |
| "quantization_config": { |
| "_load_in_4bit": true, |
| "_load_in_8bit": false, |
| "bnb_4bit_compute_dtype": "bfloat16", |
| "bnb_4bit_quant_storage": "uint8", |
| "bnb_4bit_quant_type": "nf4", |
| "bnb_4bit_use_double_quant": true, |
| "llm_int8_enable_fp32_cpu_offload": false, |
| "llm_int8_has_fp16_weight": false, |
| "llm_int8_skip_modules": null, |
| "llm_int8_threshold": 6.0, |
| "load_in_4bit": true, |
| "load_in_8bit": false, |
| "quant_method": "bitsandbytes" |
| }, |
| "rms_norm_eps": 1e-05, |
| "rope_theta": 1000000.0, |
| "sliding_window": null, |
| "tie_word_embeddings": false, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.46.1", |
| "unsloth_version": "2024.9", |
| "use_cache": true, |
| "vocab_size": 32768 |
| } |
|
|
|
|
| [INFO|2025-02-04 05:46:38] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unslothai--aws/snapshots/66e4c14a24a0b445779c922eef992a4af0694a88/config.json |
|
|
| [INFO|2025-02-04 05:46:38] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unslothai--repeat/snapshots/7c48478c02f84ed89f149b0815cc0216ee831fb0/config.json |
|
|
| [INFO|2025-02-04 05:46:38] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unslothai--vram-24/snapshots/61324ceeacd75b2b31f7a789a9c9d82058e6118c/config.json |
|
|
| [INFO|2025-02-04 05:46:38] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unslothai--1/snapshots/7ec782b7604cd9ea0781c23a4270f031650f5617/config.json |
|
|
| [INFO|2025-02-04 05:46:38] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unsloth--mistral-7b-instruct-v0.3-bnb-4bit/snapshots/d5f623888f1415cf89b5c208d09cb620694618ee/config.json |
|
|
| [INFO|2025-02-04 05:46:38] configuration_utils.py:746 >> Model config MistralConfig { |
| "_name_or_path": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit", |
| "architectures": [ |
| "MistralForCausalLM" |
| ], |
| "attention_dropout": 0.0, |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "head_dim": 128, |
| "hidden_act": "silu", |
| "hidden_size": 4096, |
| "initializer_range": 0.02, |
| "intermediate_size": 14336, |
| "max_position_embeddings": 32768, |
| "model_type": "mistral", |
| "num_attention_heads": 32, |
| "num_hidden_layers": 32, |
| "num_key_value_heads": 8, |
| "pad_token_id": 770, |
| "quantization_config": { |
| "_load_in_4bit": true, |
| "_load_in_8bit": false, |
| "bnb_4bit_compute_dtype": "bfloat16", |
| "bnb_4bit_quant_storage": "uint8", |
| "bnb_4bit_quant_type": "nf4", |
| "bnb_4bit_use_double_quant": true, |
| "llm_int8_enable_fp32_cpu_offload": false, |
| "llm_int8_has_fp16_weight": false, |
| "llm_int8_skip_modules": null, |
| "llm_int8_threshold": 6.0, |
| "load_in_4bit": true, |
| "load_in_8bit": false, |
| "quant_method": "bitsandbytes" |
| }, |
| "rms_norm_eps": 1e-05, |
| "rope_theta": 1000000.0, |
| "sliding_window": null, |
| "tie_word_embeddings": false, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.46.1", |
| "unsloth_version": "2024.9", |
| "use_cache": true, |
| "vocab_size": 32768 |
| } |
|
|
|
|
| [INFO|2025-02-04 05:46:38] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unsloth--mistral-7b-instruct-v0.3-bnb-4bit/snapshots/d5f623888f1415cf89b5c208d09cb620694618ee/config.json |
|
|
| [INFO|2025-02-04 05:46:38] configuration_utils.py:746 >> Model config MistralConfig { |
| "_name_or_path": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit", |
| "architectures": [ |
| "MistralForCausalLM" |
| ], |
| "attention_dropout": 0.0, |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "head_dim": 128, |
| "hidden_act": "silu", |
| "hidden_size": 4096, |
| "initializer_range": 0.02, |
| "intermediate_size": 14336, |
| "max_position_embeddings": 32768, |
| "model_type": "mistral", |
| "num_attention_heads": 32, |
| "num_hidden_layers": 32, |
| "num_key_value_heads": 8, |
| "pad_token_id": 770, |
| "quantization_config": { |
| "_load_in_4bit": true, |
| "_load_in_8bit": false, |
| "bnb_4bit_compute_dtype": "bfloat16", |
| "bnb_4bit_quant_storage": "uint8", |
| "bnb_4bit_quant_type": "nf4", |
| "bnb_4bit_use_double_quant": true, |
| "llm_int8_enable_fp32_cpu_offload": false, |
| "llm_int8_has_fp16_weight": false, |
| "llm_int8_skip_modules": null, |
| "llm_int8_threshold": 6.0, |
| "load_in_4bit": true, |
| "load_in_8bit": false, |
| "quant_method": "bitsandbytes" |
| }, |
| "rms_norm_eps": 1e-05, |
| "rope_theta": 1000000.0, |
| "sliding_window": null, |
| "tie_word_embeddings": false, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.46.1", |
| "unsloth_version": "2024.9", |
| "use_cache": true, |
| "vocab_size": 32768 |
| } |
|
|
|
|
| [INFO|2025-02-04 05:46:39] modeling_utils.py:3937 >> loading weights file model.safetensors from cache at /home/zeus/.cache/huggingface/hub/models--unsloth--mistral-7b-instruct-v0.3-bnb-4bit/snapshots/d5f623888f1415cf89b5c208d09cb620694618ee/model.safetensors |
|
|
| [INFO|2025-02-04 05:46:39] modeling_utils.py:1670 >> Instantiating MistralForCausalLM model under default dtype torch.bfloat16. |
|
|
| [INFO|2025-02-04 05:46:39] configuration_utils.py:1096 >> Generate config GenerationConfig { |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "pad_token_id": 770 |
| } |
|
|
|
|
| [INFO|2025-02-04 05:46:41] modeling_utils.py:4800 >> All model checkpoint weights were used when initializing MistralForCausalLM. |
|
|
|
|
| [INFO|2025-02-04 05:46:41] modeling_utils.py:4808 >> All the weights of MistralForCausalLM were initialized from the model checkpoint at unsloth/mistral-7b-instruct-v0.3-bnb-4bit. |
| If your task is similar to the task the model of the checkpoint was trained on, you can already use MistralForCausalLM for predictions without further training. |
|
|
| [INFO|2025-02-04 05:46:41] configuration_utils.py:1051 >> loading configuration file generation_config.json from cache at /home/zeus/.cache/huggingface/hub/models--unsloth--mistral-7b-instruct-v0.3-bnb-4bit/snapshots/d5f623888f1415cf89b5c208d09cb620694618ee/generation_config.json |
|
|
| [INFO|2025-02-04 05:46:41] configuration_utils.py:1096 >> Generate config GenerationConfig { |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "max_length": 32768, |
| "pad_token_id": 770 |
| } |
|
|
|
|
| [INFO|2025-02-04 05:46:42] logging.py:157 >> Gradient checkpointing enabled. |
|
|
| [INFO|2025-02-04 05:46:42] logging.py:157 >> Upcasting trainable params to float32. |
|
|
| [INFO|2025-02-04 05:46:42] logging.py:157 >> Fine-tuning method: LoRA |
|
|
| [INFO|2025-02-04 05:46:42] logging.py:157 >> Found linear modules: v_proj,up_proj,q_proj,down_proj,k_proj,o_proj,gate_proj |
|
|
| [WARNING|2025-02-04 05:46:44] logging.py:168 >> Unsloth 2025.1.8 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers. |
|
|
| [INFO|2025-02-04 05:46:45] logging.py:157 >> trainable params: 20,971,520 || all params: 7,268,995,072 || trainable%: 0.2885 |
|
|
| [INFO|2025-02-04 05:46:45] trainer.py:698 >> Using auto half precision backend |
|
|
| [WARNING|2025-02-04 05:46:45] <string>:208 >> ==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1 |
| \\ /| Num examples = 3,716 | Num Epochs = 3 |
| O^O/ \_/ \ Batch size per device = 8 | Gradient Accumulation steps = 4 |
| \ / Total batch size = 32 | Total steps = 348 |
| "-____-" Number of trainable parameters = 20,971,520 |
|
|
| [WARNING|2025-02-04 05:46:46] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:46:58] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:47:08] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:47:18] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:47:28] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:47:38] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:47:48] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:47:59] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:48:09] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:48:19] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 05:48:29] logging.py:157 >> {'loss': 0.2134, 'learning_rate': 2.9939e-05, 'epoch': 0.09} |
|
|
| [WARNING|2025-02-04 05:48:29] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:48:39] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:48:50] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:49:00] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:49:10] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:49:20] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:49:30] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:49:41] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:49:51] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:50:00] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 05:50:11] logging.py:157 >> {'loss': 0.0692, 'learning_rate': 2.9756e-05, 'epoch': 0.17} |
|
|
| [WARNING|2025-02-04 05:50:11] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:50:21] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:50:31] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:50:41] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:50:53] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:51:03] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:51:13] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:51:23] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:51:34] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:51:44] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 05:51:54] logging.py:157 >> {'loss': 0.0594, 'learning_rate': 2.9453e-05, 'epoch': 0.26} |
|
|
| [WARNING|2025-02-04 05:51:54] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:52:04] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:52:14] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:52:24] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:52:34] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:52:45] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:52:55] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:53:06] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:53:17] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:53:27] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 05:53:37] logging.py:157 >> {'loss': 0.0546, 'learning_rate': 2.9033e-05, 'epoch': 0.34} |
|
|
| [WARNING|2025-02-04 05:53:37] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:53:47] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:53:58] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:54:07] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:54:17] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:54:27] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:54:38] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:54:48] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:54:58] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:55:09] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 05:55:19] logging.py:157 >> {'loss': 0.0511, 'learning_rate': 2.8498e-05, 'epoch': 0.43} |
|
|
| [WARNING|2025-02-04 05:55:19] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:55:29] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:55:39] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:55:50] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:56:00] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:56:10] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:56:20] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:56:30] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:56:40] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:56:51] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 05:57:01] logging.py:157 >> {'loss': 0.0516, 'learning_rate': 2.7853e-05, 'epoch': 0.52} |
|
|
| [WARNING|2025-02-04 05:57:01] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:57:11] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:57:21] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:57:31] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:57:42] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:57:52] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:58:03] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:58:13] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:58:23] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:58:34] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 05:58:44] logging.py:157 >> {'loss': 0.0464, 'learning_rate': 2.7103e-05, 'epoch': 0.60} |
|
|
| [WARNING|2025-02-04 05:58:44] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:58:54] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:59:04] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:59:14] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:59:25] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:59:35] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:59:45] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 05:59:55] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:00:05] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:00:16] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:00:26] logging.py:157 >> {'loss': 0.0450, 'learning_rate': 2.6255e-05, 'epoch': 0.69} |
|
|
| [WARNING|2025-02-04 06:00:26] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:00:37] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:00:47] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:00:57] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:01:07] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:01:18] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:01:28] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:01:39] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:01:49] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:01:59] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:02:10] logging.py:157 >> {'loss': 0.0478, 'learning_rate': 2.5315e-05, 'epoch': 0.77} |
|
|
| [WARNING|2025-02-04 06:02:10] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:02:21] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:02:31] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:02:42] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:02:52] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:03:02] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:03:12] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:03:22] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:03:32] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:03:42] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:03:52] logging.py:157 >> {'loss': 0.0397, 'learning_rate': 2.4292e-05, 'epoch': 0.86} |
|
|
| [WARNING|2025-02-04 06:03:52] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:04:02] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:04:12] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:04:23] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:04:33] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:04:43] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:04:54] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:05:04] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:05:14] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:05:23] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:05:33] logging.py:157 >> {'loss': 0.0362, 'learning_rate': 2.3192e-05, 'epoch': 0.95} |
|
|
| [WARNING|2025-02-04 06:05:33] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:05:43] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:05:53] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:06:03] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:06:13] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:06:23] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:06:34] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:06:35] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:06:45] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:06:56] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:07:06] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:07:14] logging.py:157 >> {'loss': 0.0428, 'learning_rate': 2.2026e-05, 'epoch': 1.03} |
|
|
| [WARNING|2025-02-04 06:07:16] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:07:27] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:07:37] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:07:47] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:07:58] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:08:09] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:08:19] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:08:29] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:08:40] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:08:50] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:08:58] logging.py:157 >> {'loss': 0.0334, 'learning_rate': 2.0803e-05, 'epoch': 1.12} |
|
|
| [WARNING|2025-02-04 06:09:00] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:09:10] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:09:20] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:09:30] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:09:40] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:09:51] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:10:02] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:10:12] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:10:22] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:10:32] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:10:40] logging.py:157 >> {'loss': 0.0350, 'learning_rate': 1.9532e-05, 'epoch': 1.20} |
|
|
| [WARNING|2025-02-04 06:10:42] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:10:52] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:11:02] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:11:12] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:11:23] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:11:33] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:11:43] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:11:53] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:12:04] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:12:13] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:12:21] logging.py:157 >> {'loss': 0.0352, 'learning_rate': 1.8225e-05, 'epoch': 1.29} |
|
|
| [WARNING|2025-02-04 06:12:23] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:12:34] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:12:44] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:12:54] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:13:04] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:13:14] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:13:24] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:13:34] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:13:44] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:13:54] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:14:02] logging.py:157 >> {'loss': 0.0364, 'learning_rate': 1.6891e-05, 'epoch': 1.38} |
|
|
| [WARNING|2025-02-04 06:14:05] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:14:15] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:14:25] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:14:35] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:14:45] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:14:55] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:15:06] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:15:16] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:15:27] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:15:37] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:15:45] logging.py:157 >> {'loss': 0.0332, 'learning_rate': 1.5542e-05, 'epoch': 1.46} |
|
|
| [WARNING|2025-02-04 06:15:48] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:15:58] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:16:08] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:16:18] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:16:28] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:16:38] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:16:48] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:16:58] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:17:09] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:17:19] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:17:27] logging.py:157 >> {'loss': 0.0315, 'learning_rate': 1.4188e-05, 'epoch': 1.55} |
|
|
| [WARNING|2025-02-04 06:17:29] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:17:39] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:17:49] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:17:59] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:18:09] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:18:19] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:18:29] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:18:39] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:18:50] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:19:00] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:19:08] logging.py:157 >> {'loss': 0.0312, 'learning_rate': 1.2841e-05, 'epoch': 1.63} |
|
|
| [WARNING|2025-02-04 06:19:10] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:19:21] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:19:31] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:19:42] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:19:53] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:20:03] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:20:13] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:20:22] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:20:32] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:20:43] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:20:51] logging.py:157 >> {'loss': 0.0324, 'learning_rate': 1.1511e-05, 'epoch': 1.72} |
|
|
| [WARNING|2025-02-04 06:20:53] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:21:04] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:21:14] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:21:25] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:21:35] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:21:45] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:21:55] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:22:05] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:22:15] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:22:26] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:22:34] logging.py:157 >> {'loss': 0.0326, 'learning_rate': 1.0210e-05, 'epoch': 1.81} |
|
|
| [WARNING|2025-02-04 06:22:37] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:22:47] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:22:57] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:23:07] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:23:18] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:23:28] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:23:38] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:23:48] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:23:58] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:24:09] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:24:16] logging.py:157 >> {'loss': 0.0319, 'learning_rate': 8.9485e-06, 'epoch': 1.89} |
|
|
| [WARNING|2025-02-04 06:24:19] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:24:29] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:24:39] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:24:49] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:24:59] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:25:09] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:25:19] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:25:30] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:25:39] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:25:50] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:25:58] logging.py:157 >> {'loss': 0.0319, 'learning_rate': 7.7358e-06, 'epoch': 1.98} |
|
|
| [WARNING|2025-02-04 06:26:00] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:26:10] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:26:21] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:26:22] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:26:33] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:26:43] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:26:53] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:27:03] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:27:13] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:27:23] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:27:34] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:27:39] logging.py:157 >> {'loss': 0.0247, 'learning_rate': 6.5822e-06, 'epoch': 2.06} |
|
|
| [WARNING|2025-02-04 06:27:43] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:27:54] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:28:04] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:28:14] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:28:25] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:28:35] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:28:45] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:28:55] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:29:06] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:29:17] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:29:22] logging.py:157 >> {'loss': 0.0276, 'learning_rate': 5.4972e-06, 'epoch': 2.15} |
|
|
| [WARNING|2025-02-04 06:29:27] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:29:37] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:29:49] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:29:59] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:30:09] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:30:19] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:30:30] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:30:40] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:30:50] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:31:01] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:31:07] logging.py:157 >> {'loss': 0.0259, 'learning_rate': 4.4896e-06, 'epoch': 2.24} |
|
|
| [WARNING|2025-02-04 06:31:12] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:31:22] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:31:32] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:31:42] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:31:52] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:32:02] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:32:12] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:32:22] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:32:32] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:32:42] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:32:47] logging.py:157 >> {'loss': 0.0213, 'learning_rate': 3.5676e-06, 'epoch': 2.32} |
|
|
| [WARNING|2025-02-04 06:32:53] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:33:03] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:33:14] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:33:24] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:33:35] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:33:44] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:33:55] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:34:06] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:34:16] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:34:27] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:34:32] logging.py:157 >> {'loss': 0.0224, 'learning_rate': 2.7387e-06, 'epoch': 2.41} |
|
|
| [WARNING|2025-02-04 06:34:37] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:34:47] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:34:57] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:35:07] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:35:18] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:35:28] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:35:38] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:35:49] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:35:58] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:36:08] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:36:13] logging.py:157 >> {'loss': 0.0253, 'learning_rate': 2.0096e-06, 'epoch': 2.49} |
|
|
| [WARNING|2025-02-04 06:36:19] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:36:29] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:36:39] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:36:49] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:36:59] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:37:10] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:37:20] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:37:31] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:37:41] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:37:51] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:37:56] logging.py:157 >> {'loss': 0.0241, 'learning_rate': 1.3864e-06, 'epoch': 2.58} |
|
|
| [WARNING|2025-02-04 06:38:01] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:38:11] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:38:22] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:38:32] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:38:41] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:38:51] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:39:02] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:39:12] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:39:23] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:39:33] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:39:38] logging.py:157 >> {'loss': 0.0268, 'learning_rate': 8.7399e-07, 'epoch': 2.67} |
|
|
| [WARNING|2025-02-04 06:39:43] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:39:53] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:40:03] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:40:13] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:40:24] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:40:33] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:40:43] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:40:54] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:41:04] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:41:14] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:41:20] logging.py:157 >> {'loss': 0.0222, 'learning_rate': 4.7666e-07, 'epoch': 2.75} |
|
|
| [WARNING|2025-02-04 06:41:25] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:41:35] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:41:45] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:41:54] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:42:05] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:42:16] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:42:26] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:42:36] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:42:46] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:42:57] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:43:02] logging.py:157 >> {'loss': 0.0265, 'learning_rate': 1.9760e-07, 'epoch': 2.84} |
|
|
| [WARNING|2025-02-04 06:43:07] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:43:17] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:43:27] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:43:38] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:43:48] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:43:58] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:44:08] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:44:18] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:44:28] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:44:38] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:44:43] logging.py:157 >> {'loss': 0.0242, 'learning_rate': 3.9102e-08, 'epoch': 2.92} |
|
|
| [WARNING|2025-02-04 06:44:48] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:44:58] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:45:09] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:45:19] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:45:29] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:45:40] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:45:50] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [WARNING|2025-02-04 06:46:01] logging.py:168 >> 'Seq2SeqTrainingArguments' object has no attribute 'average_tokens_across_devices' |
|
|
| [INFO|2025-02-04 06:46:06] trainer.py:3801 >> Saving model checkpoint to saves/Mistral-7B-Instruct-v0.3/lora/mistral_7b_trained/checkpoint-348 |
|
|
| [INFO|2025-02-04 06:46:06] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unsloth--mistral-7b-instruct-v0.3-bnb-4bit/snapshots/d5f623888f1415cf89b5c208d09cb620694618ee/config.json |
|
|
| [INFO|2025-02-04 06:46:06] configuration_utils.py:746 >> Model config MistralConfig { |
| "_name_or_path": "unsloth/Mistral-7B-Instruct-v0.3", |
| "architectures": [ |
| "MistralForCausalLM" |
| ], |
| "attention_dropout": 0.0, |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "head_dim": 128, |
| "hidden_act": "silu", |
| "hidden_size": 4096, |
| "initializer_range": 0.02, |
| "intermediate_size": 14336, |
| "max_position_embeddings": 32768, |
| "model_type": "mistral", |
| "num_attention_heads": 32, |
| "num_hidden_layers": 32, |
| "num_key_value_heads": 8, |
| "pad_token_id": 770, |
| "quantization_config": { |
| "_load_in_4bit": true, |
| "_load_in_8bit": false, |
| "bnb_4bit_compute_dtype": "bfloat16", |
| "bnb_4bit_quant_storage": "uint8", |
| "bnb_4bit_quant_type": "nf4", |
| "bnb_4bit_use_double_quant": true, |
| "llm_int8_enable_fp32_cpu_offload": false, |
| "llm_int8_has_fp16_weight": false, |
| "llm_int8_skip_modules": null, |
| "llm_int8_threshold": 6.0, |
| "load_in_4bit": true, |
| "load_in_8bit": false, |
| "quant_method": "bitsandbytes" |
| }, |
| "rms_norm_eps": 1e-05, |
| "rope_theta": 1000000.0, |
| "sliding_window": null, |
| "tie_word_embeddings": false, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.46.1", |
| "unsloth_version": "2024.9", |
| "use_cache": true, |
| "vocab_size": 32768 |
| } |
|
|
|
|
| [INFO|2025-02-04 06:46:06] <string>:484 >> |
|
|
| Training completed. Do not forget to share your model on huggingface.co/models =) |
|
|
|
|
|
|
| [INFO|2025-02-04 06:46:06] trainer.py:3801 >> Saving model checkpoint to saves/Mistral-7B-Instruct-v0.3/lora/mistral_7b_trained |
|
|
| [INFO|2025-02-04 06:46:06] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unsloth--mistral-7b-instruct-v0.3-bnb-4bit/snapshots/d5f623888f1415cf89b5c208d09cb620694618ee/config.json |
|
|
| [INFO|2025-02-04 06:46:06] configuration_utils.py:746 >> Model config MistralConfig { |
| "_name_or_path": "unsloth/Mistral-7B-Instruct-v0.3", |
| "architectures": [ |
| "MistralForCausalLM" |
| ], |
| "attention_dropout": 0.0, |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "head_dim": 128, |
| "hidden_act": "silu", |
| "hidden_size": 4096, |
| "initializer_range": 0.02, |
| "intermediate_size": 14336, |
| "max_position_embeddings": 32768, |
| "model_type": "mistral", |
| "num_attention_heads": 32, |
| "num_hidden_layers": 32, |
| "num_key_value_heads": 8, |
| "pad_token_id": 770, |
| "quantization_config": { |
| "_load_in_4bit": true, |
| "_load_in_8bit": false, |
| "bnb_4bit_compute_dtype": "bfloat16", |
| "bnb_4bit_quant_storage": "uint8", |
| "bnb_4bit_quant_type": "nf4", |
| "bnb_4bit_use_double_quant": true, |
| "llm_int8_enable_fp32_cpu_offload": false, |
| "llm_int8_has_fp16_weight": false, |
| "llm_int8_skip_modules": null, |
| "llm_int8_threshold": 6.0, |
| "load_in_4bit": true, |
| "load_in_8bit": false, |
| "quant_method": "bitsandbytes" |
| }, |
| "rms_norm_eps": 1e-05, |
| "rope_theta": 1000000.0, |
| "sliding_window": null, |
| "tie_word_embeddings": false, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.46.1", |
| "unsloth_version": "2024.9", |
| "use_cache": true, |
| "vocab_size": 32768 |
| } |
|
|
|
|
| [WARNING|2025-02-04 06:46:07] logging.py:162 >> No metric eval_loss to plot. |
|
|
| [WARNING|2025-02-04 06:46:07] logging.py:162 >> No metric eval_accuracy to plot. |
|
|
| [INFO|2025-02-04 06:46:07] modelcard.py:449 >> Dropping the following result as it does not have all the necessary fields: |
| {'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}} |
|
|
|
|