Upload folder using huggingface_hub

6fdcfa7 verified about 1 year ago

22.3 kB

	[INFO\|2025-04-07 18:40:30] configuration_utils.py:699 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/config.json

	[INFO\|2025-04-07 18:40:30] configuration_utils.py:771 >> Model config MistralConfig {
	"_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
	"architectures": [
	"MistralForCausalLM"
	],
	"attention_dropout": 0.0,
	"bos_token_id": 1,
	"eos_token_id": 2,
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 4096,
	"initializer_range": 0.02,
	"intermediate_size": 14336,
	"max_position_embeddings": 32768,
	"model_type": "mistral",
	"num_attention_heads": 32,
	"num_hidden_layers": 32,
	"num_key_value_heads": 8,
	"rms_norm_eps": 1e-05,
	"rope_theta": 1000000.0,
	"sliding_window": null,
	"tie_word_embeddings": false,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.49.0",
	"use_cache": true,
	"vocab_size": 32768
	}


	[INFO\|2025-04-07 18:40:31] tokenization_utils_base.py:2050 >> loading file tokenizer.model from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/tokenizer.model

	[INFO\|2025-04-07 18:40:31] tokenization_utils_base.py:2050 >> loading file tokenizer.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/tokenizer.json

	[INFO\|2025-04-07 18:40:31] tokenization_utils_base.py:2050 >> loading file added_tokens.json from cache at None

	[INFO\|2025-04-07 18:40:31] tokenization_utils_base.py:2050 >> loading file special_tokens_map.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/special_tokens_map.json

	[INFO\|2025-04-07 18:40:31] tokenization_utils_base.py:2050 >> loading file tokenizer_config.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/tokenizer_config.json

	[INFO\|2025-04-07 18:40:31] tokenization_utils_base.py:2050 >> loading file chat_template.jinja from cache at None

	[INFO\|2025-04-07 18:40:32] configuration_utils.py:699 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/config.json

	[INFO\|2025-04-07 18:40:32] configuration_utils.py:771 >> Model config MistralConfig {
	"_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
	"architectures": [
	"MistralForCausalLM"
	],
	"attention_dropout": 0.0,
	"bos_token_id": 1,
	"eos_token_id": 2,
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 4096,
	"initializer_range": 0.02,
	"intermediate_size": 14336,
	"max_position_embeddings": 32768,
	"model_type": "mistral",
	"num_attention_heads": 32,
	"num_hidden_layers": 32,
	"num_key_value_heads": 8,
	"rms_norm_eps": 1e-05,
	"rope_theta": 1000000.0,
	"sliding_window": null,
	"tie_word_embeddings": false,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.49.0",
	"use_cache": true,
	"vocab_size": 32768
	}


	[INFO\|2025-04-07 18:40:32] tokenization_utils_base.py:2050 >> loading file tokenizer.model from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/tokenizer.model

	[INFO\|2025-04-07 18:40:32] tokenization_utils_base.py:2050 >> loading file tokenizer.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/tokenizer.json

	[INFO\|2025-04-07 18:40:32] tokenization_utils_base.py:2050 >> loading file added_tokens.json from cache at None

	[INFO\|2025-04-07 18:40:32] tokenization_utils_base.py:2050 >> loading file special_tokens_map.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/special_tokens_map.json

	[INFO\|2025-04-07 18:40:32] tokenization_utils_base.py:2050 >> loading file tokenizer_config.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/tokenizer_config.json

	[INFO\|2025-04-07 18:40:32] tokenization_utils_base.py:2050 >> loading file chat_template.jinja from cache at None

	[INFO\|2025-04-07 18:40:32] logging.py:157 >> Add pad token: </s>

	[INFO\|2025-04-07 18:40:32] logging.py:157 >> Loading dataset en_train_part_1.json...

	[INFO\|2025-04-07 18:40:34] configuration_utils.py:699 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.3/snapshots/e0bc86c23ce5aae1db576c8cca6f06f1f73af2db/config.json

	[INFO\|2025-04-07 18:40:34] configuration_utils.py:771 >> Model config MistralConfig {
	"_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
	"architectures": [
	"MistralForCausalLM"
	],
	"attention_dropout": 0.0,
	"bos_token_id": 1,
	"eos_token_id": 2,
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 4096,
	"initializer_range": 0.02,
	"intermediate_size": 14336,
	"max_position_embeddings": 32768,
	"model_type": "mistral",
	"num_attention_heads": 32,
	"num_hidden_layers": 32,
	"num_key_value_heads": 8,
	"rms_norm_eps": 1e-05,
	"rope_theta": 1000000.0,
	"sliding_window": null,
	"tie_word_embeddings": false,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.49.0",
	"use_cache": true,
	"vocab_size": 32768
	}


	[INFO\|2025-04-07 18:40:34] logging.py:157 >> Quantizing model to 4 bit with bitsandbytes.

	[INFO\|2025-04-07 18:40:37] configuration_utils.py:699 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unsloth--mistral-7b-instruct-v0.3-bnb-4bit/snapshots/d5f623888f1415cf89b5c208d09cb620694618ee/config.json

	[INFO\|2025-04-07 18:40:37] configuration_utils.py:771 >> Model config MistralConfig {
	"_name_or_path": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
	"architectures": [
	"MistralForCausalLM"
	],
	"attention_dropout": 0.0,
	"bos_token_id": 1,
	"eos_token_id": 2,
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 4096,
	"initializer_range": 0.02,
	"intermediate_size": 14336,
	"max_position_embeddings": 32768,
	"model_type": "mistral",
	"num_attention_heads": 32,
	"num_hidden_layers": 32,
	"num_key_value_heads": 8,
	"pad_token_id": 770,
	"quantization_config": {
	"_load_in_4bit": true,
	"_load_in_8bit": false,
	"bnb_4bit_compute_dtype": "bfloat16",
	"bnb_4bit_quant_storage": "uint8",
	"bnb_4bit_quant_type": "nf4",
	"bnb_4bit_use_double_quant": true,
	"llm_int8_enable_fp32_cpu_offload": false,
	"llm_int8_has_fp16_weight": false,
	"llm_int8_skip_modules": null,
	"llm_int8_threshold": 6.0,
	"load_in_4bit": true,
	"load_in_8bit": false,
	"quant_method": "bitsandbytes"
	},
	"rms_norm_eps": 1e-05,
	"rope_theta": 1000000.0,
	"sliding_window": null,
	"tie_word_embeddings": false,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.49.0",
	"unsloth_version": "2024.9",
	"use_cache": true,
	"vocab_size": 32768
	}


	[INFO\|2025-04-07 18:40:38] configuration_utils.py:699 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unslothai--aws/snapshots/66e4c14a24a0b445779c922eef992a4af0694a88/config.json

	[INFO\|2025-04-07 18:40:38] configuration_utils.py:699 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unslothai--repeat/snapshots/7c48478c02f84ed89f149b0815cc0216ee831fb0/config.json

	[INFO\|2025-04-07 18:40:39] configuration_utils.py:699 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unslothai--vram-48/snapshots/3aea312d98ea327daeb5dbf7374b1d7cf8c65bc0/config.json

	[INFO\|2025-04-07 18:40:39] configuration_utils.py:699 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unslothai--1/snapshots/7ec782b7604cd9ea0781c23a4270f031650f5617/config.json

	[INFO\|2025-04-07 18:40:39] configuration_utils.py:699 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unsloth--mistral-7b-instruct-v0.3-bnb-4bit/snapshots/d5f623888f1415cf89b5c208d09cb620694618ee/config.json

	[INFO\|2025-04-07 18:40:39] configuration_utils.py:771 >> Model config MistralConfig {
	"_name_or_path": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
	"architectures": [
	"MistralForCausalLM"
	],
	"attention_dropout": 0.0,
	"bos_token_id": 1,
	"eos_token_id": 2,
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 4096,
	"initializer_range": 0.02,
	"intermediate_size": 14336,
	"max_position_embeddings": 32768,
	"model_type": "mistral",
	"num_attention_heads": 32,
	"num_hidden_layers": 32,
	"num_key_value_heads": 8,
	"pad_token_id": 770,
	"quantization_config": {
	"_load_in_4bit": true,
	"_load_in_8bit": false,
	"bnb_4bit_compute_dtype": "bfloat16",
	"bnb_4bit_quant_storage": "uint8",
	"bnb_4bit_quant_type": "nf4",
	"bnb_4bit_use_double_quant": true,
	"llm_int8_enable_fp32_cpu_offload": false,
	"llm_int8_has_fp16_weight": false,
	"llm_int8_skip_modules": null,
	"llm_int8_threshold": 6.0,
	"load_in_4bit": true,
	"load_in_8bit": false,
	"quant_method": "bitsandbytes"
	},
	"rms_norm_eps": 1e-05,
	"rope_theta": 1000000.0,
	"sliding_window": null,
	"tie_word_embeddings": false,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.49.0",
	"unsloth_version": "2024.9",
	"use_cache": true,
	"vocab_size": 32768
	}


	[INFO\|2025-04-07 18:40:39] configuration_utils.py:699 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unsloth--mistral-7b-instruct-v0.3-bnb-4bit/snapshots/d5f623888f1415cf89b5c208d09cb620694618ee/config.json

	[INFO\|2025-04-07 18:40:39] configuration_utils.py:771 >> Model config MistralConfig {
	"_name_or_path": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
	"architectures": [
	"MistralForCausalLM"
	],
	"attention_dropout": 0.0,
	"bos_token_id": 1,
	"eos_token_id": 2,
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 4096,
	"initializer_range": 0.02,
	"intermediate_size": 14336,
	"max_position_embeddings": 32768,
	"model_type": "mistral",
	"num_attention_heads": 32,
	"num_hidden_layers": 32,
	"num_key_value_heads": 8,
	"pad_token_id": 770,
	"quantization_config": {
	"_load_in_4bit": true,
	"_load_in_8bit": false,
	"bnb_4bit_compute_dtype": "bfloat16",
	"bnb_4bit_quant_storage": "uint8",
	"bnb_4bit_quant_type": "nf4",
	"bnb_4bit_use_double_quant": true,
	"llm_int8_enable_fp32_cpu_offload": false,
	"llm_int8_has_fp16_weight": false,
	"llm_int8_skip_modules": null,
	"llm_int8_threshold": 6.0,
	"load_in_4bit": true,
	"load_in_8bit": false,
	"quant_method": "bitsandbytes"
	},
	"rms_norm_eps": 1e-05,
	"rope_theta": 1000000.0,
	"sliding_window": null,
	"tie_word_embeddings": false,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.49.0",
	"unsloth_version": "2024.9",
	"use_cache": true,
	"vocab_size": 32768
	}


	[INFO\|2025-04-07 18:40:51] modeling_utils.py:3982 >> loading weights file model.safetensors from cache at /home/zeus/.cache/huggingface/hub/models--unsloth--mistral-7b-instruct-v0.3-bnb-4bit/snapshots/d5f623888f1415cf89b5c208d09cb620694618ee/model.safetensors

	[INFO\|2025-04-07 18:40:51] modeling_utils.py:1633 >> Instantiating MistralForCausalLM model under default dtype torch.bfloat16.

	[INFO\|2025-04-07 18:40:51] configuration_utils.py:1140 >> Generate config GenerationConfig {
	"bos_token_id": 1,
	"eos_token_id": 2,
	"pad_token_id": 770
	}


	[INFO\|2025-04-07 18:40:52] modeling_utils.py:4970 >> All model checkpoint weights were used when initializing MistralForCausalLM.


	[INFO\|2025-04-07 18:40:52] modeling_utils.py:4978 >> All the weights of MistralForCausalLM were initialized from the model checkpoint at unsloth/mistral-7b-instruct-v0.3-bnb-4bit.
	If your task is similar to the task the model of the checkpoint was trained on, you can already use MistralForCausalLM for predictions without further training.

	[INFO\|2025-04-07 18:40:53] configuration_utils.py:1095 >> loading configuration file generation_config.json from cache at /home/zeus/.cache/huggingface/hub/models--unsloth--mistral-7b-instruct-v0.3-bnb-4bit/snapshots/d5f623888f1415cf89b5c208d09cb620694618ee/generation_config.json

	[INFO\|2025-04-07 18:40:53] configuration_utils.py:1140 >> Generate config GenerationConfig {
	"bos_token_id": 1,
	"eos_token_id": 2,
	"max_length": 32768,
	"pad_token_id": 770
	}


	[INFO\|2025-04-07 18:40:57] logging.py:157 >> Gradient checkpointing enabled.

	[INFO\|2025-04-07 18:40:57] logging.py:157 >> Upcasting trainable params to float32.

	[INFO\|2025-04-07 18:40:57] logging.py:157 >> Fine-tuning method: LoRA

	[INFO\|2025-04-07 18:40:57] logging.py:157 >> Found linear modules: v_proj,down_proj,k_proj,o_proj,up_proj,q_proj,gate_proj

	[WARNING\|2025-04-07 18:40:59] logging.py:329 >> Unsloth 2025.3.19 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.

	[INFO\|2025-04-07 18:41:00] logging.py:157 >> trainable params: 20,971,520 \|\| all params: 7,268,995,072 \|\| trainable%: 0.2885

	[INFO\|2025-04-07 18:41:00] trainer.py:746 >> Using auto half precision backend

	[WARNING\|2025-04-07 18:41:01] <string>:168 >> ==((====))== Unsloth - 2x faster free finetuning \| Num GPUs used = 1
	\\ /\| Num examples = 929 \| Num Epochs = 3 \| Total steps = 348
	O^O/ \_/ \ Batch size per device = 4 \| Gradient accumulation steps = 2
	\ / Data Parallel GPUs = 1 \| Total batch size (4 x 2 x 1) = 8
	"-____-" Trainable parameters = 20,971,520/7,000,000,000 (0.30% trained)

	[INFO\|2025-04-07 18:41:23] logging.py:157 >> {'loss': 0.8269, 'learning_rate': 2.9939e-05, 'epoch': 0.09, 'throughput': 1205.00}

	[INFO\|2025-04-07 18:41:32] logging.py:157 >> {'loss': 0.1558, 'learning_rate': 2.9756e-05, 'epoch': 0.17, 'throughput': 1741.54}

	[INFO\|2025-04-07 18:41:41] logging.py:157 >> {'loss': 0.1655, 'learning_rate': 2.9453e-05, 'epoch': 0.26, 'throughput': 2021.00}

	[INFO\|2025-04-07 18:41:50] logging.py:157 >> {'loss': 0.1686, 'learning_rate': 2.9033e-05, 'epoch': 0.34, 'throughput': 2196.79}

	[INFO\|2025-04-07 18:41:59] logging.py:157 >> {'loss': 0.1280, 'learning_rate': 2.8498e-05, 'epoch': 0.43, 'throughput': 2312.55}

	[INFO\|2025-04-07 18:42:08] logging.py:157 >> {'loss': 0.1581, 'learning_rate': 2.7853e-05, 'epoch': 0.52, 'throughput': 2412.29}

	[INFO\|2025-04-07 18:42:17] logging.py:157 >> {'loss': 0.1106, 'learning_rate': 2.7103e-05, 'epoch': 0.60, 'throughput': 2474.02}

	[INFO\|2025-04-07 18:42:26] logging.py:157 >> {'loss': 0.1401, 'learning_rate': 2.6255e-05, 'epoch': 0.69, 'throughput': 2522.25}

	[INFO\|2025-04-07 18:42:35] logging.py:157 >> {'loss': 0.1282, 'learning_rate': 2.5315e-05, 'epoch': 0.77, 'throughput': 2559.46}

	[INFO\|2025-04-07 18:42:45] logging.py:157 >> {'loss': 0.0936, 'learning_rate': 2.4292e-05, 'epoch': 0.86, 'throughput': 2591.37}

	[INFO\|2025-04-07 18:42:54] logging.py:157 >> {'loss': 0.1569, 'learning_rate': 2.3192e-05, 'epoch': 0.94, 'throughput': 2617.95}

	[INFO\|2025-04-07 18:43:03] logging.py:157 >> {'loss': 0.0904, 'learning_rate': 2.2026e-05, 'epoch': 1.03, 'throughput': 2622.68}

	[INFO\|2025-04-07 18:43:12] logging.py:157 >> {'loss': 0.0730, 'learning_rate': 2.0803e-05, 'epoch': 1.11, 'throughput': 2640.79}

	[INFO\|2025-04-07 18:43:21] logging.py:157 >> {'loss': 0.0580, 'learning_rate': 1.9532e-05, 'epoch': 1.20, 'throughput': 2656.87}

	[INFO\|2025-04-07 18:43:30] logging.py:157 >> {'loss': 0.0818, 'learning_rate': 1.8225e-05, 'epoch': 1.28, 'throughput': 2673.66}

	[INFO\|2025-04-07 18:43:39] logging.py:157 >> {'loss': 0.0715, 'learning_rate': 1.6891e-05, 'epoch': 1.37, 'throughput': 2687.40}

	[INFO\|2025-04-07 18:43:48] logging.py:157 >> {'loss': 0.0691, 'learning_rate': 1.5542e-05, 'epoch': 1.45, 'throughput': 2700.42}

	[INFO\|2025-04-07 18:43:58] logging.py:157 >> {'loss': 0.0713, 'learning_rate': 1.4188e-05, 'epoch': 1.54, 'throughput': 2713.99}

	[INFO\|2025-04-07 18:44:07] logging.py:157 >> {'loss': 0.0985, 'learning_rate': 1.2841e-05, 'epoch': 1.63, 'throughput': 2726.48}

	[INFO\|2025-04-07 18:44:16] logging.py:157 >> {'loss': 0.0728, 'learning_rate': 1.1511e-05, 'epoch': 1.71, 'throughput': 2736.15}

	[INFO\|2025-04-07 18:44:25] logging.py:157 >> {'loss': 0.1002, 'learning_rate': 1.0210e-05, 'epoch': 1.80, 'throughput': 2748.97}

	[INFO\|2025-04-07 18:44:34] logging.py:157 >> {'loss': 0.0976, 'learning_rate': 8.9485e-06, 'epoch': 1.88, 'throughput': 2756.85}

	[INFO\|2025-04-07 18:44:43] logging.py:157 >> {'loss': 0.0801, 'learning_rate': 7.7358e-06, 'epoch': 1.97, 'throughput': 2764.62}

	[INFO\|2025-04-07 18:44:52] logging.py:157 >> {'loss': 0.0533, 'learning_rate': 6.5822e-06, 'epoch': 2.05, 'throughput': 2769.81}

	[INFO\|2025-04-07 18:45:00] logging.py:157 >> {'loss': 0.0590, 'learning_rate': 5.4972e-06, 'epoch': 2.14, 'throughput': 2775.77}

	[INFO\|2025-04-07 18:45:09] logging.py:157 >> {'loss': 0.0505, 'learning_rate': 4.4896e-06, 'epoch': 2.22, 'throughput': 2781.09}

	[INFO\|2025-04-07 18:45:18] logging.py:157 >> {'loss': 0.0404, 'learning_rate': 3.5676e-06, 'epoch': 2.31, 'throughput': 2786.55}

	[INFO\|2025-04-07 18:45:27] logging.py:157 >> {'loss': 0.0590, 'learning_rate': 2.7387e-06, 'epoch': 2.39, 'throughput': 2792.12}

	[INFO\|2025-04-07 18:45:36] logging.py:157 >> {'loss': 0.0429, 'learning_rate': 2.0096e-06, 'epoch': 2.48, 'throughput': 2797.48}

	[INFO\|2025-04-07 18:45:45] logging.py:157 >> {'loss': 0.0341, 'learning_rate': 1.3864e-06, 'epoch': 2.57, 'throughput': 2802.16}

	[INFO\|2025-04-07 18:45:54] logging.py:157 >> {'loss': 0.0349, 'learning_rate': 8.7399e-07, 'epoch': 2.65, 'throughput': 2806.55}

	[INFO\|2025-04-07 18:46:04] logging.py:157 >> {'loss': 0.0375, 'learning_rate': 4.7666e-07, 'epoch': 2.74, 'throughput': 2812.12}

	[INFO\|2025-04-07 18:46:13] logging.py:157 >> {'loss': 0.0184, 'learning_rate': 1.9760e-07, 'epoch': 2.82, 'throughput': 2816.04}

	[INFO\|2025-04-07 18:46:22] logging.py:157 >> {'loss': 0.0583, 'learning_rate': 3.9102e-08, 'epoch': 2.91, 'throughput': 2819.51}

	[INFO\|2025-04-07 18:46:29] trainer.py:3942 >> Saving model checkpoint to saves/Mistral-7B-Instruct-v0.3/lora/part_2/checkpoint-348

	[INFO\|2025-04-07 18:46:30] configuration_utils.py:699 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unsloth--mistral-7b-instruct-v0.3-bnb-4bit/snapshots/d5f623888f1415cf89b5c208d09cb620694618ee/config.json

	[INFO\|2025-04-07 18:46:30] configuration_utils.py:771 >> Model config MistralConfig {
	"_name_or_path": "unsloth/Mistral-7B-Instruct-v0.3",
	"architectures": [
	"MistralForCausalLM"
	],
	"attention_dropout": 0.0,
	"bos_token_id": 1,
	"eos_token_id": 2,
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 4096,
	"initializer_range": 0.02,
	"intermediate_size": 14336,
	"max_position_embeddings": 32768,
	"model_type": "mistral",
	"num_attention_heads": 32,
	"num_hidden_layers": 32,
	"num_key_value_heads": 8,
	"pad_token_id": 770,
	"quantization_config": {
	"_load_in_4bit": true,
	"_load_in_8bit": false,
	"bnb_4bit_compute_dtype": "bfloat16",
	"bnb_4bit_quant_storage": "uint8",
	"bnb_4bit_quant_type": "nf4",
	"bnb_4bit_use_double_quant": true,
	"llm_int8_enable_fp32_cpu_offload": false,
	"llm_int8_has_fp16_weight": false,
	"llm_int8_skip_modules": null,
	"llm_int8_threshold": 6.0,
	"load_in_4bit": true,
	"load_in_8bit": false,
	"quant_method": "bitsandbytes"
	},
	"rms_norm_eps": 1e-05,
	"rope_theta": 1000000.0,
	"sliding_window": null,
	"tie_word_embeddings": false,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.49.0",
	"unsloth_version": "2024.9",
	"use_cache": true,
	"vocab_size": 32768
	}


	[INFO\|2025-04-07 18:46:31] <string>:415 >>

	Training completed. Do not forget to share your model on huggingface.co/models =)



	[INFO\|2025-04-07 18:46:31] trainer.py:3942 >> Saving model checkpoint to saves/Mistral-7B-Instruct-v0.3/lora/part_2

	[INFO\|2025-04-07 18:46:31] configuration_utils.py:699 >> loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--unsloth--mistral-7b-instruct-v0.3-bnb-4bit/snapshots/d5f623888f1415cf89b5c208d09cb620694618ee/config.json

	[INFO\|2025-04-07 18:46:31] configuration_utils.py:771 >> Model config MistralConfig {
	"_name_or_path": "unsloth/Mistral-7B-Instruct-v0.3",
	"architectures": [
	"MistralForCausalLM"
	],
	"attention_dropout": 0.0,
	"bos_token_id": 1,
	"eos_token_id": 2,
	"head_dim": 128,
	"hidden_act": "silu",
	"hidden_size": 4096,
	"initializer_range": 0.02,
	"intermediate_size": 14336,
	"max_position_embeddings": 32768,
	"model_type": "mistral",
	"num_attention_heads": 32,
	"num_hidden_layers": 32,
	"num_key_value_heads": 8,
	"pad_token_id": 770,
	"quantization_config": {
	"_load_in_4bit": true,
	"_load_in_8bit": false,
	"bnb_4bit_compute_dtype": "bfloat16",
	"bnb_4bit_quant_storage": "uint8",
	"bnb_4bit_quant_type": "nf4",
	"bnb_4bit_use_double_quant": true,
	"llm_int8_enable_fp32_cpu_offload": false,
	"llm_int8_has_fp16_weight": false,
	"llm_int8_skip_modules": null,
	"llm_int8_threshold": 6.0,
	"load_in_4bit": true,
	"load_in_8bit": false,
	"quant_method": "bitsandbytes"
	},
	"rms_norm_eps": 1e-05,
	"rope_theta": 1000000.0,
	"sliding_window": null,
	"tie_word_embeddings": false,
	"torch_dtype": "bfloat16",
	"transformers_version": "4.49.0",
	"unsloth_version": "2024.9",
	"use_cache": true,
	"vocab_size": 32768
	}


	[WARNING\|2025-04-07 18:46:31] logging.py:162 >> No metric eval_loss to plot.

	[WARNING\|2025-04-07 18:46:31] logging.py:162 >> No metric eval_accuracy to plot.

	[INFO\|2025-04-07 18:46:31] modelcard.py:449 >> Dropping the following result as it does not have all the necessary fields:
	{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}