adminsafesign/SafeSign-8B-Instruct-Test2

4132d3f verified almost 2 years ago

41.5 kB

	06/03/2024 09:30:43 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 8 distributed training: True, 16-bits training: False
	06/03/2024 09:30:43 - INFO - __main__ - Training parameters TrainingArguments(
	_n_gpu=8,
	adafactor=False,
	adam_beta1=0.9,
	adam_beta2=0.999,
	adam_epsilon=1e-08,
	analysis_dataset=legalbench_train_annotated,
	analysis_mode=1.0,
	auto_find_batch_size=False,
	bf16=True,
	bf16_full_eval=False,
	data_seed=None,
	dataloader_drop_last=False,
	dataloader_num_workers=0,
	dataloader_persistent_workers=False,
	dataloader_pin_memory=True,
	ddp_backend=None,
	ddp_broadcast_buffers=None,
	ddp_bucket_cap_mb=None,
	ddp_find_unused_parameters=None,
	ddp_timeout=1800,
	debug=[],
	deepspeed=None,
	disable_tqdm=False,
	dispatch_batches=None,
	do_eval=True,
	do_predict=False,
	do_train=True,
	eval_accumulation_steps=None,
	eval_delay=0,
	eval_steps=2000,
	evaluation_strategy=steps,
	fp16=False,
	fp16_backend=auto,
	fp16_full_eval=False,
	fp16_opt_level=O1,
	fsdp=[],
	fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
	fsdp_min_num_params=0,
	fsdp_transformer_layer_cls_to_wrap=None,
	full_determinism=False,
	gradient_accumulation_steps=32,
	gradient_checkpointing=False,
	gradient_checkpointing_kwargs=None,
	granularity=sequence,
	greater_is_better=False,
	group_by_length=True,
	half_precision_backend=auto,
	hub_always_push=False,
	hub_model_id=None,
	hub_private_repo=False,
	hub_strategy=every_save,
	hub_token=<HUB_TOKEN>,
	ignore_data_skip=False,
	include_inputs_for_metrics=False,
	include_num_input_tokens_seen=False,
	include_tokens_per_second=False,
	inner_lr=1e-05,
	jit_mode_eval=False,
	label_names=None,
	label_smoothing_factor=0.0,
	learning_rate=2e-05,
	length_column_name=length,
	load_best_model_at_end=True,
	local_rank=0,
	log_level=passive,
	log_level_replica=warning,
	log_on_each_node=True,
	logging_dir=final_out/llama3-8b-instruct-final-less-lora-everything/runs/Jun03_09-30-41_sagemaker-data-sci-ml-p4d-24xlarge-7e6e825bf88acadc865d379de4ed,
	logging_first_step=False,
	logging_nan_inf_filter=True,
	logging_steps=1.0,
	logging_strategy=steps,
	lr_scheduler_kwargs={},
	lr_scheduler_type=linear,
	max_grad_norm=1.0,
	max_steps=-1,
	meta_epilson=1.0,
	metric_for_best_model=eval_loss,
	mp_parameters=,
	neftune_noise_alpha=None,
	no_cuda=False,
	num_train_epochs=5.0,
	optim=adamw_torch,
	optim_args=None,
	output_dir=final_out/llama3-8b-instruct-final-less-lora-everything,
	overwrite_output_dir=False,
	past_index=-1,
	per_device_eval_batch_size=4,
	per_device_train_batch_size=4,
	prediction_loss_only=False,
	push_to_hub=False,
	push_to_hub_model_id=None,
	push_to_hub_organization=None,
	push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
	ray_scope=last,
	remove_unused_columns=True,
	report_to=['wandb'],
	resume_from_checkpoint=None,
	run_name=final_out/llama3-8b-instruct-final-less-lora-everything,
	save_on_each_node=False,
	save_only_model=False,
	save_safetensors=True,
	save_steps=2000,
	save_strategy=steps,
	save_total_limit=None,
	seed=0,
	select_frac=0.5,
	skip_memory_metrics=True,
	split_batches=False,
	target_dataset=mmlu,
	tf32=False,
	torch_compile=False,
	torch_compile_backend=None,
	torch_compile_mode=None,
	torchdynamo=None,
	tpu_metrics_debug=False,
	tpu_num_cores=None,
	train_dataset_names=None,
	use_cpu=False,
	use_ipex=False,
	use_legacy_prediction_loop=False,
	use_mps_device=False,
	warmup_ratio=0.03,
	warmup_steps=0,
	weight_decay=0.0,
	weight_learning_rate=1e-05,
	)
	06/03/2024 09:30:43 - INFO - __main__ - Model parameters ModelArguments(model_name_or_path='meta-llama/Meta-Llama-3-8B-Instruct', reference_model_path=None, model_meta_name_or_path=None, config_name=None, tokenizer_name=None, cache_dir=None, use_fast_tokenizer=True, model_revision='main', use_auth_token=False, torch_dtype=None, token_wise=False, lora=True, lora_r=128, lora_alpha=512.0, lora_dropout=0.1, lora_target_modules=None)
	06/03/2024 09:30:43 - INFO - __main__ - Dataset parameters DataArguments(train_files=['legalbench_train_annotated'], overwrite_cache=False, preprocessing_num_workers=None, max_seq_length=8192, sample_data_seed=42, percentage=1.0, data_dir='data')
	/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
	warnings.warn(
	[INFO\|tokenization_utils_base.py:2026] 2024-06-03 09:30:43,346 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/e1945c40cd546c78e41f1151f4db032b271faeaa/tokenizer.json
	[INFO\|tokenization_utils_base.py:2026] 2024-06-03 09:30:43,346 >> loading file added_tokens.json from cache at None
	[INFO\|tokenization_utils_base.py:2026] 2024-06-03 09:30:43,346 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/e1945c40cd546c78e41f1151f4db032b271faeaa/special_tokens_map.json
	[INFO\|tokenization_utils_base.py:2026] 2024-06-03 09:30:43,346 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/e1945c40cd546c78e41f1151f4db032b271faeaa/tokenizer_config.json
	[WARNING\|logging.py:314] 2024-06-03 09:30:43,664 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
	Overwrite dataset info from restored data version if exists.
	Loading Legalbench data (legalbench_train_annotated)
	06/03/2024 09:30:45 - INFO - datasets.builder - Overwrite dataset info from restored data version if exists.
	Loading Dataset info from /root/.cache/huggingface/datasets/adminsafesign___legalbench_train_annotated_instruct_dataset/default/0.0.0/5da14aca2e7c80fd6c61cb95e2e40f2d723b957a
	06/03/2024 09:30:45 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/adminsafesign___legalbench_train_annotated_instruct_dataset/default/0.0.0/5da14aca2e7c80fd6c61cb95e2e40f2d723b957a
	Found cached dataset legalbench_train_annotated_instruct_dataset (/root/.cache/huggingface/datasets/adminsafesign___legalbench_train_annotated_instruct_dataset/default/0.0.0/5da14aca2e7c80fd6c61cb95e2e40f2d723b957a)
	06/03/2024 09:30:45 - INFO - datasets.builder - Found cached dataset legalbench_train_annotated_instruct_dataset (/root/.cache/huggingface/datasets/adminsafesign___legalbench_train_annotated_instruct_dataset/default/0.0.0/5da14aca2e7c80fd6c61cb95e2e40f2d723b957a)
	Loading Dataset info from /root/.cache/huggingface/datasets/adminsafesign___legalbench_train_annotated_instruct_dataset/default/0.0.0/5da14aca2e7c80fd6c61cb95e2e40f2d723b957a
	06/03/2024 09:30:45 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/adminsafesign___legalbench_train_annotated_instruct_dataset/default/0.0.0/5da14aca2e7c80fd6c61cb95e2e40f2d723b957a
	0%\| \| 0/1656 [00:00<?, ?it/s] 22%\|█████████████████████████▏ \| 365/1656 [00:00<00:00, 1938.73it/s] 100%\|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 1656/1656 [00:00<00:00, 6714.36it/s]
	================================================================================
	Size of combined datasets: 1656
	================================================================================
	Spawning 10 processes
	06/03/2024 09:30:46 - INFO - datasets.arrow_dataset - Spawning 10 processes
	Tokenizing and reformatting instruction data (num_proc=10): 0%\| \| 0/1656 [00:00<?, ? examples/s] Tokenizing and reformatting instruction data (num_proc=10): 1%\|▋ \| 21/1656 [00:00<00:47, 34.60 examples/s] Tokenizing and reformatting instruction data (num_proc=10): 4%\|█▊ \| 62/1656 [00:00<00:16, 94.36 examples/s] Tokenizing and reformatting instruction data (num_proc=10): 15%\|███████▏ \| 249/1656 [00:00<00:03, 392.12 examples/s] Tokenizing and reformatting instruction data (num_proc=10): 29%\|█████████████▋ \| 472/1656 [00:01<00:01, 691.88 examples/s] Tokenizing and reformatting instruction data (num_proc=10): 38%\|██████████████████ \| 623/1656 [00:01<00:01, 751.27 examples/s] Tokenizing and reformatting instruction data (num_proc=10): 50%\|███████████████████████▉ \| 827/1656 [00:01<00:00, 901.94 examples/s] Tokenizing and reformatting instruction data (num_proc=10): 57%\|███████████████████████████▌ \| 952/1656 [00:01<00:00, 887.17 examples/s] Tokenizing and reformatting instruction data (num_proc=10): 68%\|███████████████████████████████▊ \| 1120/1656 [00:01<00:00, 984.75 examples/s] Tokenizing and reformatting instruction data (num_proc=10): 76%\|███████████████████████████████████▏ \| 1265/1656 [00:01<00:00, 1063.35 examples/s] Tokenizing and reformatting instruction data (num_proc=10): 88%\|████████████████████████████████████████▎ \| 1450/1656 [00:01<00:00, 1190.95 examples/s] Tokenizing and reformatting instruction data (num_proc=10): 96%\|█████████████████████████████████████████████ \| 1586/1656 [00:02<00:00, 916.84 examples/s] Tokenizing and reformatting instruction data (num_proc=10): 100%\|███████████████████████████████████████████████\| 1656/1656 [00:02<00:00, 705.79 examples/s]
	Concatenating 10 shards
	06/03/2024 09:30:48 - INFO - datasets.arrow_dataset - Concatenating 10 shards
	/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
	warnings.warn(
	[INFO\|configuration_utils.py:739] 2024-06-03 09:30:48,888 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/e1945c40cd546c78e41f1151f4db032b271faeaa/config.json
	[INFO\|configuration_utils.py:802] 2024-06-03 09:30:48,889 >> Model config LlamaConfig {
	"_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
	"architectures": [
	"LlamaForCausalLM"
	],
	"attention_bias": false,
	"attention_dropout": 0.0,
	"bos_token_id": 128000,
	"eos_token_id": 128009,
	"hidden_act": "silu",
	"hidden_size": 4096,
	"initializer_range": 0.02,
	"intermediate_size": 14336,
	"max_position_embeddings": 8192,
	"model_type": "llama",
	"num_attention_heads": 32,
	"num_hidden_layers": 32,
	"num_key_value_heads": 8,
	"pretraining_tp": 1,
	"rms_norm_eps": 1e-05,
	"rope_scaling": null,
	"rope_theta": 500000.0,
	"tie_word_embeddings": false,
	"transformers_version": "4.36.2",
	"use_cache": true,
	"vocab_size": 128256
	}

	[INFO\|modeling_utils.py:3344] 2024-06-03 09:30:48,932 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/e1945c40cd546c78e41f1151f4db032b271faeaa/model.safetensors.index.json
	[INFO\|configuration_utils.py:826] 2024-06-03 09:30:48,949 >> Generate config GenerationConfig {
	"bos_token_id": 128000,
	"eos_token_id": 128009
	}

	Removed 0 oversized examples
	Loading checkpoint shards: 0%\| \| 0/4 [00:00<?, ?it/s] Loading checkpoint shards: 25%\|███████████████████████▌ \| 1/4 [00:03<00:09, 3.16s/it] Loading checkpoint shards: 50%\|███████████████████████████████████████████████ \| 2/4 [00:05<00:05, 2.95s/it] Loading checkpoint shards: 75%\|██████████████████████████████████████████████████████████████████████▌ \| 3/4 [00:23<00:09, 9.79s/it] Loading checkpoint shards: 100%\|██████████████████████████████████████████████████████████████████████████████████████████████\| 4/4 [00:28<00:00, 7.67s/it] Loading checkpoint shards: 100%\|██████████████████████████████████████████████████████████████████████████████████████████████\| 4/4 [00:28<00:00, 7.08s/it]
	[INFO\|modeling_utils.py:4185] 2024-06-03 09:31:18,972 >> All model checkpoint weights were used when initializing LlamaForCausalLM.

	[INFO\|modeling_utils.py:4193] 2024-06-03 09:31:18,972 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at meta-llama/Meta-Llama-3-8B-Instruct.
	If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
	[INFO\|configuration_utils.py:781] 2024-06-03 09:31:19,343 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/e1945c40cd546c78e41f1151f4db032b271faeaa/generation_config.json
	[INFO\|configuration_utils.py:826] 2024-06-03 09:31:19,343 >> Generate config GenerationConfig {
	"bos_token_id": 128000,
	"do_sample": true,
	"eos_token_id": [
	128001,
	128009
	],
	"max_length": 4096,
	"temperature": 0.6,
	"top_p": 0.9
	}

	06/03/2024 09:31:20 - INFO - __main__ - Applied LoRA to model.
	trainable params: 54,525,952 \|\| all params: 8,084,852,736 \|\| trainable%: 0.674421090655225
	Map: 0%\| \| 0/1656 [00:00<?, ? examples/s]/root/meta_weight_llm/less/train/data_arguments.py:53: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
	com_len = (torch.tensor(labels) > -1).sum()
	Map: 60%\|████████████████████████████████████████████████████████████▉ \| 1000/1656 [00:00<00:00, 8649.19 examples/s] Map: 100%\|█████████████████████████████████████████████████████████████████████████████████████████████████████\| 1656/1656 [00:00<00:00, 8620.75 examples/s]
	[train set] examples: 1656; # avg tokens: 262.96136474609375
	[train set] examples: 1656; # avg completion tokens: 2.4480676651000977
	06/03/2024 09:31:20 - INFO - __main__ - Sample 788 of the training set: {'input_ids': tensor([128000, 128006, 882, 128007, 198, 39, 7596, 352, 374,
	459, 704, 8838, 98462, 5224, 11784, 311, 12391, 279,
	8206, 315, 279, 5030, 50697, 382, 48, 25, 2057,
	12391, 53215, 574, 264, 1695, 4394, 11, 264, 5224,
	1903, 304, 5590, 430, 568, 10456, 311, 2571, 14177,
	9875, 13, 128009, 128006, 78191, 128007, 2201, 128001]), 'labels': tensor([ -100, -100, -100, -100, -100, -100, -100, -100, -100,
	-100, -100, -100, -100, -100, -100, -100, -100, -100,
	-100, -100, -100, -100, -100, -100, -100, -100, -100,
	-100, -100, -100, -100, -100, -100, -100, -100, -100,
	-100, -100, -100, -100, -100, -100, -100, -100, -100,
	-100, -100, -100, -100, -100, -100, 2201, 128001]), 'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1]), 'n_unmaked_labels': tensor(2)}.
	06/03/2024 09:31:20 - INFO - __main__ - trainable model_params: 54525952
	Overwrite dataset info from restored data version if exists.
	06/03/2024 09:31:22 - INFO - datasets.builder - Overwrite dataset info from restored data version if exists.
	Loading Dataset info from /root/.cache/huggingface/datasets/adminsafesign___legalbench_train_annotated_instruct_dataset/default/0.0.0/5da14aca2e7c80fd6c61cb95e2e40f2d723b957a
	06/03/2024 09:31:22 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/adminsafesign___legalbench_train_annotated_instruct_dataset/default/0.0.0/5da14aca2e7c80fd6c61cb95e2e40f2d723b957a
	Found cached dataset legalbench_train_annotated_instruct_dataset (/root/.cache/huggingface/datasets/adminsafesign___legalbench_train_annotated_instruct_dataset/default/0.0.0/5da14aca2e7c80fd6c61cb95e2e40f2d723b957a)
	06/03/2024 09:31:22 - INFO - datasets.builder - Found cached dataset legalbench_train_annotated_instruct_dataset (/root/.cache/huggingface/datasets/adminsafesign___legalbench_train_annotated_instruct_dataset/default/0.0.0/5da14aca2e7c80fd6c61cb95e2e40f2d723b957a)
	Loading Dataset info from /root/.cache/huggingface/datasets/adminsafesign___legalbench_train_annotated_instruct_dataset/default/0.0.0/5da14aca2e7c80fd6c61cb95e2e40f2d723b957a
	06/03/2024 09:31:22 - INFO - datasets.info - Loading Dataset info from /root/.cache/huggingface/datasets/adminsafesign___legalbench_train_annotated_instruct_dataset/default/0.0.0/5da14aca2e7c80fd6c61cb95e2e40f2d723b957a
	[WARNING\|tokenization_utils_base.py:2605] 2024-06-03 09:31:22,541 >> Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
	[INFO\|trainer.py:396] 2024-06-03 09:31:24,080 >> You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
	[INFO\|trainer.py:519] 2024-06-03 09:31:24,084 >> max_steps is given, it will override any value given in num_train_epochs
	[INFO\|trainer.py:568] 2024-06-03 09:31:24,084 >> Using auto half precision backend
	****** Example starts ******
	<\|start_header_id\|>user<\|end_header_id\|>
	A mark is generic if it is the common name for the product. A mark is descriptive if it describes a purpose, nature, or attribute of the product. A mark is suggestive if it suggests or implies a quality or characteristic of the product. A mark is arbitrary if it is a real English word that has no relation to the product. A mark is fanciful if it is an invented word.

	Q: The mark 'Ivory' for a product made of elephant tusks. What is the type of mark?<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>generic
	****** Example ends ******
	PeftModelForCausalLM(
	(base_model): LoraModel(
	(model): LlamaForCausalLM(
	(model): LlamaModel(
	(embed_tokens): Embedding(128264, 4096)
	(layers): ModuleList(
	(0-31): 32 x LlamaDecoderLayer(
	(self_attn): LlamaSdpaAttention(
	(q_proj): lora.Linear(
	(base_layer): Linear(in_features=4096, out_features=4096, bias=False)
	(lora_dropout): ModuleDict(
	(default): Dropout(p=0.1, inplace=False)
	)
	(lora_A): ModuleDict(
	(default): Linear(in_features=4096, out_features=128, bias=False)
	)
	(lora_B): ModuleDict(
	(default): Linear(in_features=128, out_features=4096, bias=False)
	)
	(lora_embedding_A): ParameterDict()
	(lora_embedding_B): ParameterDict()
	)
	(k_proj): Linear(in_features=4096, out_features=1024, bias=False)
	(v_proj): lora.Linear(
	(base_layer): Linear(in_features=4096, out_features=1024, bias=False)
	(lora_dropout): ModuleDict(
	(default): Dropout(p=0.1, inplace=False)
	)
	(lora_A): ModuleDict(
	(default): Linear(in_features=4096, out_features=128, bias=False)
	)
	(lora_B): ModuleDict(
	(default): Linear(in_features=128, out_features=1024, bias=False)
	)
	(lora_embedding_A): ParameterDict()
	(lora_embedding_B): ParameterDict()
	)
	(o_proj): Linear(in_features=4096, out_features=4096, bias=False)
	(rotary_emb): LlamaRotaryEmbedding()
	)
	(mlp): LlamaMLP(
	(gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
	(up_proj): Linear(in_features=4096, out_features=14336, bias=False)
	(down_proj): Linear(in_features=14336, out_features=4096, bias=False)
	(act_fn): SiLU()
	)
	(input_layernorm): LlamaRMSNorm()
	(post_attention_layernorm): LlamaRMSNorm()
	)
	)
	(norm): LlamaRMSNorm()
	)
	(lm_head): Linear(in_features=4096, out_features=128264, bias=False)
	)
	)
	)
	> /root/meta_weight_llm/less/train/train.py(192)main()
	-> if analysis_dataset is not None:
	(Pdb) 187 ),
	188 )
	189 import pdb; pdb.set_trace()
	190
	191 # Initial evaluation
	192 -> if analysis_dataset is not None:
	193 trainer.evaluate(analysis_dataset)
	194
	195 # Training
	196 train_result = trainer.train()
	197 trainer.save_model(
	(Pdb) [INFO\|hub.py:748] 2024-06-03 09:32:39,136 >> Uploading the following files to adminsafesign/SafeSign-8B-Instruct-Test: README.md,adapter_model.safetensors,adapter_config.json
	adapter_model.safetensors: 0%\| \| 0.00/218M [00:00<?, ?B/s] adapter_model.safetensors: 1%\|▌ \| 1.43M/218M [00:00<00:17, 12.3MB/s] adapter_model.safetensors: 2%\|██▏ \| 5.34M/218M [00:00<00:08, 24.1MB/s] adapter_model.safetensors: 6%\|█████▍ \| 13.4M/218M [00:00<00:04, 47.3MB/s] adapter_model.safetensors: 8%\|███████▍ \| 18.4M/218M [00:00<00:08, 24.4MB/s] adapter_model.safetensors: 11%\|█████████▍ \| 23.3M/218M [00:00<00:07, 27.3MB/s] adapter_model.safetensors: 13%\|███████████▊ \| 29.2M/218M [00:00<00:05, 31.7MB/s] adapter_model.safetensors: 15%\|█████████████▎ \| 33.0M/218M [00:01<00:09, 19.9MB/s] adapter_model.safetensors: 18%\|████████████████ \| 39.7M/218M [00:01<00:06, 25.9MB/s] adapter_model.safetensors: 21%\|██████████████████▌ \| 46.1M/218M [00:01<00:05, 30.7MB/s] adapter_model.safetensors: 23%\|████████████████████▏ \| 50.0M/218M [00:02<00:08, 20.8MB/s] adapter_model.safetensors: 25%\|██████████████████████ \| 54.7M/218M [00:02<00:07, 22.2MB/s] adapter_model.safetensors: 26%\|███████████████████████▏ \| 57.6M/218M [00:02<00:07, 20.3MB/s] adapter_model.safetensors: 29%\|█████████████████████████▍ \| 63.1M/218M [00:02<00:06, 24.9MB/s] adapter_model.safetensors: 30%\|██████████████████████████▋ \| 66.1M/218M [00:02<00:09, 16.3MB/s] adapter_model.safetensors: 32%\|████████████████████████████▎ \| 70.2M/218M [00:03<00:07, 18.8MB/s] adapter_model.safetensors: 34%\|█████████████████████████████▉ \| 74.1M/218M [00:03<00:07, 20.1MB/s] adapter_model.safetensors: 37%\|████████████████████████████████▎ \| 80.0M/218M [00:03<00:07, 17.5MB/s] adapter_model.safetensors: 40%\|██████████████████████████████████▉ \| 86.8M/218M [00:03<00:05, 23.3MB/s] adapter_model.safetensors: 42%\|████████████████████████████████████▉ \| 91.6M/218M [00:03<00:04, 26.3MB/s] adapter_model.safetensors: 44%\|██████████████████████████████████████▋ \| 96.0M/218M [00:04<00:06, 17.7MB/s] adapter_model.safetensors: 48%\|██████████████████████████████████████████▉ \| 105M/218M [00:04<00:04, 26.3MB/s] adapter_model.safetensors: 51%\|█████████████████████████████████████████████▋ \| 112M/218M [00:04<00:05, 20.9MB/s] adapter_model.safetensors: 59%\|████████████████████████████████████████████████████▏ \| 128M/218M [00:05<00:02, 30.4MB/s] adapter_model.safetensors: 66%\|██████████████████████████████████████████████████████████▊ \| 144M/218M [00:05<00:02, 35.8MB/s] adapter_model.safetensors: 70%\|██████████████████████████████████████████████████████████████▎ \| 153M/218M [00:05<00:01, 38.8MB/s] adapter_model.safetensors: 73%\|████████████████████████████████████████████████████████████████▌ \| 158M/218M [00:05<00:01, 41.1MB/s] adapter_model.safetensors: 75%\|██████████████████████████████████████████████████████████████████▌ \| 163M/218M [00:06<00:02, 25.3MB/s] adapter_model.safetensors: 79%\|██████████████████████████████████████████████████████████████████████▎ \| 172M/218M [00:06<00:01, 33.4MB/s] adapter_model.safetensors: 81%\|████████████████████████████████████████████████████████████████████████▍ \| 177M/218M [00:07<00:02, 20.3MB/s] adapter_model.safetensors: 88%\|██████████████████████████████████████████████████████████████████████████████▎ \| 192M/218M [00:07<00:01, 24.7MB/s] adapter_model.safetensors: 95%\|████████████████████████████████████████████████████████████████████████████████████▉ \| 208M/218M [00:07<00:00, 31.4MB/s] adapter_model.safetensors: 100%\|█████████████████████████████████████████████████████████████████████████████████████████\| 218M/218M [00:08<00:00, 26.3MB/s]
	CommitInfo(commit_url='https://huggingface.co/adminsafesign/SafeSign-8B-Instruct-Test/commit/240fb3f8035f99be74d327b191a26fde6c606f26', commit_message='Upload model', commit_description='', oid='240fb3f8035f99be74d327b191a26fde6c606f26', pr_url=None, pr_revision=None, pr_num=None)
	(Pdb) <bound method Trainer.push_to_hub of <transformers.trainer.Trainer object at 0x7fb9a8326140>>
	(Pdb) [INFO\|trainer.py:2889] 2024-06-03 09:33:21,306 >> Saving model checkpoint to final_out/llama3-8b-instruct-final-less-lora-everything
	[INFO\|tokenization_utils_base.py:2432] 2024-06-03 09:33:23,479 >> tokenizer config file saved in final_out/llama3-8b-instruct-final-less-lora-everything/tokenizer_config.json
	[INFO\|tokenization_utils_base.py:2441] 2024-06-03 09:33:23,490 >> Special tokens file saved in final_out/llama3-8b-instruct-final-less-lora-everything/special_tokens_map.json
	[INFO\|modelcard.py:452] 2024-06-03 09:33:23,882 >> Dropping the following result as it does not have all the necessary fields:
	{}
	Upload 2 LFS files: 0%\| \| 0/2 [00:00<?, ?it/s]
	adapter_model.safetensors: 0%\| \| 0.00/218M [00:00<?, ?B/s][A

	training_args.bin: 0%\| \| 0.00/5.11k [00:00<?, ?B/s][A[A
	adapter_model.safetensors: 1%\|▋ \| 1.75M/218M [00:00<00:14, 15.1MB/s][A training_args.bin: 100%\|███████████████████████████████████████████████████████████████████████████████████████████████\| 5.11k/5.11k [00:00<00:00, 32.0kB/s]

	adapter_model.safetensors: 2%\|█▌ \| 3.72M/218M [00:00<00:14, 15.3MB/s][A
	adapter_model.safetensors: 4%\|███▋ \| 9.04M/218M [00:00<00:07, 26.4MB/s][A
	adapter_model.safetensors: 7%\|██████▍ \| 16.0M/218M [00:00<00:11, 17.7MB/s][A
	adapter_model.safetensors: 15%\|████████████▉ \| 32.0M/218M [00:01<00:06, 27.5MB/s][A
	adapter_model.safetensors: 17%\|██████████████▊ \| 36.9M/218M [00:01<00:06, 29.2MB/s][A
	adapter_model.safetensors: 19%\|████████████████▍ \| 40.7M/218M [00:01<00:06, 29.5MB/s][A
	adapter_model.safetensors: 20%\|█████████████████▋ \| 43.7M/218M [00:01<00:06, 28.7MB/s][A
	adapter_model.safetensors: 22%\|███████████████████▎ \| 48.0M/218M [00:02<00:09, 18.7MB/s][A
	adapter_model.safetensors: 25%\|█████████████████████▋ \| 53.6M/218M [00:02<00:07, 21.9MB/s][A
	adapter_model.safetensors: 26%\|██████████████████████▉ \| 57.0M/218M [00:02<00:07, 20.7MB/s][A
	adapter_model.safetensors: 28%\|████████████████████████▍ \| 60.6M/218M [00:02<00:06, 23.3MB/s][A
	adapter_model.safetensors: 29%\|█████████████████████████▊ \| 64.0M/218M [00:03<00:11, 13.2MB/s][A
	adapter_model.safetensors: 33%\|█████████████████████████████▎ \| 72.6M/218M [00:03<00:07, 20.4MB/s][A
	adapter_model.safetensors: 36%\|███████████████████████████████▉ \| 79.2M/218M [00:03<00:05, 26.6MB/s][A
	adapter_model.safetensors: 38%\|█████████████████████████████████▌ \| 83.1M/218M [00:04<00:08, 15.8MB/s][A
	adapter_model.safetensors: 44%\|██████████████████████████████████████▋ \| 96.0M/218M [00:04<00:05, 22.6MB/s][A
	adapter_model.safetensors: 47%\|█████████████████████████████████████████▉ \| 103M/218M [00:04<00:04, 25.7MB/s][A
	adapter_model.safetensors: 49%\|███████████████████████████████████████████▍ \| 106M/218M [00:04<00:04, 25.3MB/s][A
	adapter_model.safetensors: 51%\|█████████████████████████████████████████████▏ \| 111M/218M [00:04<00:03, 28.2MB/s][A
	adapter_model.safetensors: 52%\|██████████████████████████████████████████████▋ \| 114M/218M [00:05<00:05, 18.3MB/s][A
	adapter_model.safetensors: 55%\|████████████████████████████████████████████████▌ \| 119M/218M [00:05<00:04, 20.4MB/s][A
	adapter_model.safetensors: 56%\|█████████████████████████████████████████████████▉ \| 122M/218M [00:05<00:05, 19.1MB/s][A
	adapter_model.safetensors: 58%\|████████████████████████████████████████████████████ \| 128M/218M [00:05<00:03, 22.8MB/s][A
	adapter_model.safetensors: 60%\|█████████████████████████████████████████████████████▏ \| 130M/218M [00:06<00:05, 15.5MB/s][A
	adapter_model.safetensors: 66%\|██████████████████████████████████████████████████████████▊ \| 144M/218M [00:06<00:03, 24.0MB/s][A
	adapter_model.safetensors: 73%\|█████████████████████████████████████████████████████████████████▎ \| 160M/218M [00:06<00:01, 33.1MB/s][A
	adapter_model.safetensors: 77%\|████████████████████████████████████████████████████████████████████▌ \| 168M/218M [00:06<00:01, 35.9MB/s][A
	adapter_model.safetensors: 79%\|██████████████████████████████████████████████████████████████████████▌ \| 173M/218M [00:07<00:01, 37.3MB/s][A
	adapter_model.safetensors: 81%\|████████████████████████████████████████████████████████████████████████▏ \| 177M/218M [00:07<00:01, 23.9MB/s][A
	adapter_model.safetensors: 88%\|██████████████████████████████████████████████████████████████████████████████▎ \| 192M/218M [00:07<00:00, 31.2MB/s][A
	adapter_model.safetensors: 95%\|████████████████████████████████████████████████████████████████████████████████████▉ \| 208M/218M [00:08<00:00, 34.5MB/s][A adapter_model.safetensors: 100%\|█████████████████████████████████████████████████████████████████████████████████████████\| 218M/218M [00:09<00:00, 21.8MB/s]
	Upload 2 LFS files: 50%\|██████████████████████████████████████████████████▌ \| 1/2 [00:10<00:10, 10.23s/it] Upload 2 LFS files: 100%\|█████████████████████████████████████████████████████████████████████████████████████████████████████\| 2/2 [00:10<00:00, 5.12s/it]