diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f1291caa259e5f4aa8c4d78e98a7188c319c882 --- /dev/null +++ b/config.yaml @@ -0,0 +1,79 @@ +base_model: mistralai/Mistral-7B-v0.1 +model_type: MistralForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: caffeinatedcherrychic/cidds-agg-balanced + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.1 +output_dir: ./qlora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 256 +sample_packing: false +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 64 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: +lora_target_modules: + - gate_proj + - down_proj + - up_proj + - q_proj + - v_proj + - k_proj + - o_proj + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 5 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: true +fp16: false +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +loss_watchdog_threshold: 5.0 +loss_watchdog_patience: 3 + +max_steps: 500 +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 1 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.001 +fsdp: +fsdp_config: +special_tokens: + diff --git a/dmog/axolotl-test-outputs/test.output b/dmog/axolotl-test-outputs/test.output new file mode 100644 index 0000000000000000000000000000000000000000..349b80c73b3597f8c3f587eccbe02ad6a0e2a966 --- /dev/null +++ b/dmog/axolotl-test-outputs/test.output @@ -0,0 +1,5 @@ +Hello, dhruti +####### +Finetuning +/mnt/scratch/users/dhd2000/ft14 +####### diff --git a/dmog/job.error b/dmog/job.error new file mode 100644 index 0000000000000000000000000000000000000000..edf84cacd71f51b8df914b524feaef7613361c1a --- /dev/null +++ b/dmog/job.error @@ -0,0 +1,162 @@ +mpi/openmpi/4.1.5/gcc-4.8.5 + | -- libs/gcc/system + | * --> OK + | + OK + mpi/openmpi/4.1.5/gcc-4.8.5 ... UNLOADING --> OK + libs/gcc/system ... UNLOADING --> OK +The following values were not passed to `accelerate launch` and had defaults used instead: + `--num_processes` was set to a value of `1` + `--num_machines` was set to a value of `1` + `--mixed_precision` was set to a value of `'no'` + `--dynamo_backend` was set to a value of `'no'` +To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`. + Tokenizing Prompts (num_proc=64): 0%| | 0/111 [00:00:58] [PID:30736] PyTorch version 2.1.2 available. +[2024-04-09 08:29:08,482] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-04-09 08:29:10,330] [INFO] [axolotl.normalize_config:178] [PID:30736] [RANK:0] GPU memory usage baseline: 0.000GB (+0.640GB misc) + dP dP dP + 88 88 88 + .d8888b. dP. .dP .d8888b. 88 .d8888b. d8888P 88 + 88' `88 `8bd8' 88' `88 88 88' `88 88 88 + 88. .88 .d88b. 88. .88 88 88. .88 88 88 + `88888P8 dP' `dP `88888P' dP `88888P' dP dP + + + +[2024-04-09 08:29:10,707] [DEBUG] [axolotl.load_tokenizer:245] [PID:30736] [RANK:0] EOS: 2 /  +[2024-04-09 08:29:10,708] [DEBUG] [axolotl.load_tokenizer:246] [PID:30736] [RANK:0] BOS: 1 /  +[2024-04-09 08:29:10,708] [DEBUG] [axolotl.load_tokenizer:247] [PID:30736] [RANK:0] PAD: 2 /  +[2024-04-09 08:29:10,708] [DEBUG] [axolotl.load_tokenizer:248] [PID:30736] [RANK:0] UNK: 0 /  +[2024-04-09 08:29:10,708] [INFO] [axolotl.load_tokenizer:259] [PID:30736] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference. +[2024-04-09 08:29:10,708] [INFO] [axolotl.load_tokenized_prepared_datasets:191] [PID:30736] [RANK:0] Unable to find prepared dataset in last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e +[2024-04-09 08:29:10,708] [INFO] [axolotl.load_tokenized_prepared_datasets:192] [PID:30736] [RANK:0] Loading raw datasets... +[2024-04-09 08:29:10,709] [WARNING] [axolotl.load_tokenized_prepared_datasets:194] [PID:30736] [RANK:0] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset. +[2024-04-09 08:29:10,709] [INFO] [axolotl.load_tokenized_prepared_datasets:201] [PID:30736] [RANK:0] No seed provided, using default seed of 42 +[2024-04-09 08:29:17,092] [INFO] [axolotl.load_tokenized_prepared_datasets:414] [PID:30736] [RANK:0] merging datasets +[2024-04-09 08:29:17,096] [INFO] [axolotl.log:61] [PID:30736] [RANK:0] dropping attention_mask column +[2024-04-09 08:29:18,698] [INFO] [axolotl.load_tokenized_prepared_datasets:424] [PID:30736] [RANK:0] Saving merged prepared dataset to disk... last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e +[2024-04-09 08:29:18,755] [DEBUG] [axolotl.log:61] [PID:30736] [RANK:0] total_num_tokens: 21468 +[2024-04-09 08:29:18,756] [DEBUG] [axolotl.log:61] [PID:30736] [RANK:0] `total_supervised_tokens: 259` +[2024-04-09 08:29:18,756] [DEBUG] [axolotl.log:61] [PID:30736] [RANK:0] total_num_steps: 62 +[2024-04-09 08:29:18,756] [INFO] [axolotl.prepare_dataset:124] [PID:30736] [RANK:0] Maximum number of steps set at 62 +[2024-04-09 08:29:18,759] [DEBUG] [axolotl.train.log:61] [PID:30736] [RANK:0] loading tokenizer... mistralai/Mistral-7B-v0.1 +[2024-04-09 08:29:19,028] [DEBUG] [axolotl.load_tokenizer:245] [PID:30736] [RANK:0] EOS: 2 /  +[2024-04-09 08:29:19,028] [DEBUG] [axolotl.load_tokenizer:246] [PID:30736] [RANK:0] BOS: 1 /  +[2024-04-09 08:29:19,028] [DEBUG] [axolotl.load_tokenizer:247] [PID:30736] [RANK:0] PAD: 2 /  +[2024-04-09 08:29:19,029] [DEBUG] [axolotl.load_tokenizer:248] [PID:30736] [RANK:0] UNK: 0 /  +[2024-04-09 08:29:19,029] [INFO] [axolotl.load_tokenizer:259] [PID:30736] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference. +[2024-04-09 08:29:19,029] [DEBUG] [axolotl.train.log:61] [PID:30736] [RANK:0] loading model and peft_config... +[2024-04-09 08:29:35,702] [INFO] [axolotl.load_model:660] [PID:30736] [RANK:0] GPU memory usage after model load: 4.342GB (+0.138GB cache, +0.942GB misc) +[2024-04-09 08:29:35,711] [INFO] [axolotl.load_model:701] [PID:30736] [RANK:0] converting PEFT model w/ prepare_model_for_kbit_training +[2024-04-09 08:29:35,713] [INFO] [axolotl.load_model:710] [PID:30736] [RANK:0] converting modules to torch.bfloat16 for flash attention +[2024-04-09 08:29:35,715] [INFO] [axolotl.load_lora:825] [PID:30736] [RANK:0] found linear modules: ['up_proj', 'q_proj', 'k_proj', 'gate_proj', 'down_proj', 'o_proj', 'v_proj'] +trainable params: 83,886,080 || all params: 7,325,618,176 || trainable%: 1.1451058188485088 +[2024-04-09 08:29:36,348] [INFO] [axolotl.load_model:750] [PID:30736] [RANK:0] GPU memory usage after adapters: 4.670GB (+0.935GB cache, +0.942GB misc) +[2024-04-09 08:29:36,446] [INFO] [axolotl.train.log:61] [PID:30736] [RANK:0] Pre-saving adapter config to ./qlora-out +[2024-04-09 08:29:36,459] [INFO] [axolotl.train.log:61] [PID:30736] [RANK:0] Starting trainer... +{'loss': 6.6367, 'grad_norm': 102.28898620605469, 'learning_rate': 2e-05, 'epoch': 0.08} +{'eval_loss': 7.300913333892822, 'eval_runtime': 1.3523, 'eval_samples_per_second': 8.873, 'eval_steps_per_second': 4.437, 'epoch': 0.08} +[2024-04-09 08:29:44,573] [INFO] [axolotl.callbacks.on_step_end:123] [PID:30736] [RANK:0] GPU memory usage while training: 4.843GB (+1.177GB cache, +0.965GB misc) +{'loss': 7.0616, 'grad_norm': 103.4541015625, 'learning_rate': 4e-05, 'epoch': 0.16} +{'loss': 4.686, 'grad_norm': 67.47515869140625, 'learning_rate': 6e-05, 'epoch': 0.24} +{'loss': 2.3866, 'grad_norm': 72.36919403076172, 'learning_rate': 8e-05, 'epoch': 0.32} +{'eval_loss': 0.7137572169303894, 'eval_runtime': 1.3532, 'eval_samples_per_second': 8.868, 'eval_steps_per_second': 4.434, 'epoch': 0.32} +{'loss': 0.6844, 'grad_norm': 16.83085060119629, 'learning_rate': 0.0001, 'epoch': 0.4} +{'loss': 0.914, 'grad_norm': 25.897714614868164, 'learning_rate': 0.00012, 'epoch': 0.48} +{'loss': 0.63, 'grad_norm': 18.89151382446289, 'learning_rate': 0.00014, 'epoch': 0.56} +{'loss': 0.948, 'grad_norm': 27.15555763244629, 'learning_rate': 0.00016, 'epoch': 0.64} +{'eval_loss': 1.0445994138717651, 'eval_runtime': 1.356, 'eval_samples_per_second': 8.85, 'eval_steps_per_second': 4.425, 'epoch': 0.64} +{'loss': 1.0285, 'grad_norm': 20.812381744384766, 'learning_rate': 0.00018, 'epoch': 0.72} +{'loss': 1.3756, 'grad_norm': 56.3886604309082, 'learning_rate': 0.0002, 'epoch': 0.8} +{'loss': 0.5178, 'grad_norm': 6.24803352355957, 'learning_rate': 0.00019981755542233177, 'epoch': 0.88} +{'loss': 0.6822, 'grad_norm': 8.379430770874023, 'learning_rate': 0.0001992708874098054, 'epoch': 0.96} +{'eval_loss': 1.3959709405899048, 'eval_runtime': 1.3583, 'eval_samples_per_second': 8.835, 'eval_steps_per_second': 4.417, 'epoch': 0.96} +{'loss': 1.3762, 'grad_norm': 20.744348526000977, 'learning_rate': 0.00019836199069471437, 'epoch': 1.04} +{'loss': 0.5248, 'grad_norm': 4.800480842590332, 'learning_rate': 0.0001970941817426052, 'epoch': 1.12} +{'loss': 0.8094, 'grad_norm': 11.284302711486816, 'learning_rate': 0.00019547208665085457, 'epoch': 1.2} +{'loss': 0.5222, 'grad_norm': 5.787976264953613, 'learning_rate': 0.0001935016242685415, 'epoch': 1.28} +{'eval_loss': 0.9023411870002747, 'eval_runtime': 1.3623, 'eval_samples_per_second': 8.808, 'eval_steps_per_second': 4.404, 'epoch': 1.28} +{'loss': 0.8027, 'grad_norm': 21.48629379272461, 'learning_rate': 0.00019118998459920902, 'epoch': 1.36} +{'loss': 1.7772, 'grad_norm': 38.0982666015625, 'learning_rate': 0.000188545602565321, 'epoch': 1.44} +{'loss': 0.7737, 'grad_norm': 10.824837684631348, 'learning_rate': 0.00018557812723014476, 'epoch': 1.52} +{'loss': 0.534, 'grad_norm': 9.1353120803833, 'learning_rate': 0.00018229838658936564, 'epoch': 1.6} +{'eval_loss': 0.4847445785999298, 'eval_runtime': 1.3637, 'eval_samples_per_second': 8.799, 'eval_steps_per_second': 4.4, 'epoch': 1.6} +{'loss': 0.3201, 'grad_norm': 3.8411033153533936, 'learning_rate': 0.00017871834806090501, 'epoch': 1.68} +{'loss': 2.2541, 'grad_norm': 23.888507843017578, 'learning_rate': 0.00017485107481711012, 'epoch': 1.76} +{'loss': 0.8177, 'grad_norm': 8.5956392288208, 'learning_rate': 0.00017071067811865476, 'epoch': 1.84} +{'loss': 0.4624, 'grad_norm': 3.825141191482544, 'learning_rate': 0.00016631226582407952, 'epoch': 1.92} +{'eval_loss': 0.5740255117416382, 'eval_runtime': 1.3655, 'eval_samples_per_second': 8.788, 'eval_steps_per_second': 4.394, 'epoch': 1.92} +{'loss': 0.3714, 'grad_norm': 3.558993101119995, 'learning_rate': 0.00016167188726285434, 'epoch': 2.0} +{'loss': 0.6562, 'grad_norm': 11.759211540222168, 'learning_rate': 0.00015680647467311557, 'epoch': 2.08} +{'loss': 1.5141, 'grad_norm': 96.2179183959961, 'learning_rate': 0.00015173378141776568, 'epoch': 2.16} +{'loss': 0.7753, 'grad_norm': 31.022045135498047, 'learning_rate': 0.00014647231720437686, 'epoch': 2.24} +{'eval_loss': 0.3771994113922119, 'eval_runtime': 1.3676, 'eval_samples_per_second': 8.775, 'eval_steps_per_second': 4.387, 'epoch': 2.24} +{'loss': 0.2649, 'grad_norm': 3.5004501342773438, 'learning_rate': 0.0001410412805452757, 'epoch': 2.32} +{'loss': 0.171, 'grad_norm': 5.16464376449585, 'learning_rate': 0.00013546048870425356, 'epoch': 2.4} +{'loss': 0.9172, 'grad_norm': 25.634010314941406, 'learning_rate': 0.00012975030538552032, 'epoch': 2.48} +{'loss': 0.3324, 'grad_norm': 7.102908134460449, 'learning_rate': 0.0001239315664287558, 'epoch': 2.56} +{'eval_loss': 0.29374203085899353, 'eval_runtime': 1.3678, 'eval_samples_per_second': 8.773, 'eval_steps_per_second': 4.387, 'epoch': 2.56} +{'loss': 0.4932, 'grad_norm': 6.236325263977051, 'learning_rate': 0.0001180255037813906, 'epoch': 2.64} +{'loss': 0.1284, 'grad_norm': 4.445058345794678, 'learning_rate': 0.0001120536680255323, 'epoch': 2.72} +{'loss': 0.1547, 'grad_norm': 6.94170618057251, 'learning_rate': 0.00010603784974222861, 'epoch': 2.8} +{'loss': 0.1973, 'grad_norm': 5.656033039093018, 'learning_rate': 0.0001, 'epoch': 2.88} +{'eval_loss': 0.5674905180931091, 'eval_runtime': 1.3681, 'eval_samples_per_second': 8.771, 'eval_steps_per_second': 4.386, 'epoch': 2.88} +{'loss': 0.4884, 'grad_norm': 18.19667625427246, 'learning_rate': 9.396215025777139e-05, 'epoch': 2.96} +{'loss': 0.5526, 'grad_norm': 17.964893341064453, 'learning_rate': 8.79463319744677e-05, 'epoch': 3.04} +{'loss': 0.2116, 'grad_norm': 5.015590190887451, 'learning_rate': 8.197449621860943e-05, 'epoch': 3.12} +{'loss': 0.0843, 'grad_norm': 5.6883225440979, 'learning_rate': 7.606843357124426e-05, 'epoch': 3.2} +{'eval_loss': 0.2360386848449707, 'eval_runtime': 1.3667, 'eval_samples_per_second': 8.78, 'eval_steps_per_second': 4.39, 'epoch': 3.2} +{'loss': 0.1158, 'grad_norm': 6.636446475982666, 'learning_rate': 7.024969461447972e-05, 'epoch': 3.28} +{'loss': 0.2755, 'grad_norm': 4.405576229095459, 'learning_rate': 6.453951129574644e-05, 'epoch': 3.36} +{'loss': 0.0186, 'grad_norm': 1.6179524660110474, 'learning_rate': 5.8958719454724346e-05, 'epoch': 3.44} +{'loss': 0.3836, 'grad_norm': 8.783114433288574, 'learning_rate': 5.3527682795623146e-05, 'epoch': 3.52} +{'eval_loss': 0.13969357311725616, 'eval_runtime': 1.3687, 'eval_samples_per_second': 8.767, 'eval_steps_per_second': 4.384, 'epoch': 3.52} +{'loss': 0.0141, 'grad_norm': 0.8835445046424866, 'learning_rate': 4.826621858223431e-05, 'epoch': 3.6} +{'loss': 0.6196, 'grad_norm': 12.678099632263184, 'learning_rate': 4.3193525326884435e-05, 'epoch': 3.68} +{'loss': 0.0948, 'grad_norm': 5.320870876312256, 'learning_rate': 3.832811273714569e-05, 'epoch': 3.76} +{'loss': 0.0449, 'grad_norm': 2.7501108646392822, 'learning_rate': 3.36877341759205e-05, 'epoch': 3.84} +{'eval_loss': 0.2801015079021454, 'eval_runtime': 1.3706, 'eval_samples_per_second': 8.755, 'eval_steps_per_second': 4.378, 'epoch': 3.84} +{'loss': 0.3026, 'grad_norm': 4.41072940826416, 'learning_rate': 2.9289321881345254e-05, 'epoch': 3.92} +{'loss': 0.0152, 'grad_norm': 1.2105910778045654, 'learning_rate': 2.514892518288988e-05, 'epoch': 4.0} +{'loss': 0.0629, 'grad_norm': 4.502895355224609, 'learning_rate': 2.1281651939094992e-05, 'epoch': 4.08} +{'loss': 0.2246, 'grad_norm': 6.058006286621094, 'learning_rate': 1.7701613410634365e-05, 'epoch': 4.16} +{'eval_loss': 0.19463467597961426, 'eval_runtime': 1.3725, 'eval_samples_per_second': 8.743, 'eval_steps_per_second': 4.372, 'epoch': 4.16} +{'loss': 0.0093, 'grad_norm': 0.5118169784545898, 'learning_rate': 1.442187276985526e-05, 'epoch': 4.24} +{'loss': 0.0148, 'grad_norm': 0.8497004508972168, 'learning_rate': 1.1454397434679021e-05, 'epoch': 4.32} +{'loss': 0.0392, 'grad_norm': 1.752151608467102, 'learning_rate': 8.810015400790994e-06, 'epoch': 4.4} +{'loss': 0.229, 'grad_norm': 3.6673429012298584, 'learning_rate': 6.498375731458528e-06, 'epoch': 4.48} +{'eval_loss': 0.16181980073451996, 'eval_runtime': 1.3705, 'eval_samples_per_second': 8.756, 'eval_steps_per_second': 4.378, 'epoch': 4.48} +{'loss': 0.1722, 'grad_norm': 2.9522616863250732, 'learning_rate': 4.527913349145441e-06, 'epoch': 4.56} +{'loss': 0.0295, 'grad_norm': 1.5037487745285034, 'learning_rate': 2.905818257394799e-06, 'epoch': 4.64} +{'loss': 0.03, 'grad_norm': 1.4181660413742065, 'learning_rate': 1.6380093052856483e-06, 'epoch': 4.72} +{'loss': 0.3073, 'grad_norm': 9.207091331481934, 'learning_rate': 7.291125901946027e-07, 'epoch': 4.8} +{'eval_loss': 0.14654164016246796, 'eval_runtime': 1.3704, 'eval_samples_per_second': 8.756, 'eval_steps_per_second': 4.378, 'epoch': 4.8} +{'loss': 0.032, 'grad_norm': 1.5023337602615356, 'learning_rate': 1.824445776682504e-07, 'epoch': 4.88} +{'loss': 0.1144, 'grad_norm': 2.882874011993408, 'learning_rate': 0.0, 'epoch': 4.96} +{'train_runtime': 206.4235, 'train_samples_per_second': 2.403, 'train_steps_per_second': 0.3, 'train_loss': 0.7901421915739775, 'epoch': 4.96} +[2024-04-09 08:33:03,093] [INFO] [axolotl.train.log:61] [PID:30736] [RANK:0] Training Completed!!! Saving pre-trained model to ./qlora-out +(PeftModelForCausalLM( (base_model): LoraModel( (model): MistralForCausalLM( (model): MistralModel( (embed_tokens): Embedding(32000, 4096) (layers): ModuleList( (0-31): 32 x MistralDecoderLayer( (self_attn): MistralFlashAttention2( (q_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=4096, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (k_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=1024, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (v_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=1024, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (o_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=4096, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (rotary_emb): MistralRotaryEmbedding() ) (mlp): MistralMLP( (gate_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=14336, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=14336, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (up_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=14336, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=14336, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (down_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=14336, out_features=4096, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=14336, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=4096, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (act_fn): SiLU() ) (input_layernorm): MistralRMSNorm() (post_attention_layernorm): MistralRMSNorm() ) ) (norm): MistralRMSNorm() ) (lm_head): Linear(in_features=4096, out_features=32000, bias=False) ) ) ), LlamaTokenizer(name_or_path='mistralai/Mistral-7B-v0.1', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='left', truncation_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': '', 'pad_token': ''}, clean_up_tokenization_spaces=False), added_tokens_decoder={ 0: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), 1: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), 2: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), }) + +End of job +Output file has been generated, please check /mnt/scratch/users/dhd2000/ft14/dmog/axolotl-test-outputs/test.output diff --git a/finetune-test.py b/finetune-test.py new file mode 100644 index 0000000000000000000000000000000000000000..74209cc87c69bbdec83674e631ada945646fe780 --- /dev/null +++ b/finetune-test.py @@ -0,0 +1,72 @@ +# This script is used to test the model using a dataset + +# Import the necessary libraries +from transformers import AutoModelForCausalLM, AutoTokenizer +from langchain.memory import ConversationBufferWindowMemory +from peft import PeftModel +import torch + +import json +import sys + +# Check if the correct number of arguments are provided +if len(sys.argv) != 2: + print("Usage: python finetune.py ") + sys.exit(1) + +# Get the file path from the command-line argument +jsonl_file_path = sys.argv[1] + +# Load the model and tokenizer +base_model = "mistralai/Mistral-7B-Instruct-v0.2" +tokenizer = AutoTokenizer.from_pretrained(base_model) +tokenizer.add_special_tokens({"pad_token": "[PAD]"}) +base_model = AutoModelForCausalLM.from_pretrained(base_model) + + +ft_model = PeftModel.from_pretrained(base_model, "./qlora-out") +# ft_model = ft_model.merge_and_unload() +ft_model.eval() + +# Set the device +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +ft_model.to(device) + +# Read the JSONL file +with open(jsonl_file_path, "r") as f: + tp, tn, fp, fn = 0, 0, 0, 0 + for line in f: + data = json.loads(line) + user_in = data["input"] + user_input = f"[INST] ###instruction: Check if the given traffic flow is normal or of an attacker or a victim\n###input: {user_in}\n#output: [/INST]" + encodings = tokenizer(user_input, return_tensors="pt", padding=True).to(device) + input_ids = encodings["input_ids"] + attention_mask = encodings["attention_mask"] + + output_ids = ft_model.generate(input_ids, attention_mask = attention_mask, max_new_tokens=1000, num_return_sequences=1, do_sample=True, temperature=0.1, top_p=0.9) + + generated_ids = output_ids[0, input_ids.shape[-1]:] + + # Decode the output + response = tokenizer.decode(generated_ids, skip_special_tokens=True).lower() + + # calculate true positive, true negative, false positive, false negative + if "normal" not in response and data["output"] == response: + tp += 1 + elif "normal" in response and data["output"] == response: + tn += 1 + elif "normal" in response and data["output"] != response: + fp += 1 + elif "normal" not in response and data["output"] != response: + fn += 1 + else: + print(f"Error: {response}, {data[output]}") + print(f"User input: {user_in}") + print(f"Generated response: {response}") + print(f"Expected response: {data[output]}") + print() + + print(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}") + + + diff --git a/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/data-00000-of-00001.arrow b/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..ff851bfb1c72204dcf880903821ff82e61b24cb7 --- /dev/null +++ b/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57a8a6b98277d114990fb441a27d2f777773005e6b7cf57a0ec219fe3bae40b1 +size 259336 diff --git a/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/dataset_info.json b/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..a5ae2ce8394d91f7bafa3b7fbfd4bc2c8915a991 --- /dev/null +++ b/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/dataset_info.json @@ -0,0 +1,22 @@ +{ + "citation": "", + "description": "", + "features": { + "input_ids": { + "feature": { + "dtype": "int32", + "_type": "Value" + }, + "_type": "Sequence" + }, + "labels": { + "feature": { + "dtype": "int64", + "_type": "Value" + }, + "_type": "Sequence" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/state.json b/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/state.json new file mode 100644 index 0000000000000000000000000000000000000000..6da163c36a5f5f2d39bd4e6261f547e9e0b68adc --- /dev/null +++ b/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/state.json @@ -0,0 +1,16 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "992b9317aa372e8e", + "_format_columns": [ + "input_ids", + "labels" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/meta.yaml b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/meta.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22d9925a1b7e16b24e211b3a7c199c8f72785516 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/meta.yaml @@ -0,0 +1,15 @@ +artifact_uri: file:///mnt/scratch/users/dhd2000/ft14/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/artifacts +end_time: 1712647983089 +entry_point_name: '' +experiment_id: '0' +lifecycle_stage: active +run_id: 7e75ece8e18e485db64e4e2d9196e738 +run_name: ./qlora-out +run_uuid: 7e75ece8e18e485db64e4e2d9196e738 +source_name: '' +source_type: 4 +source_version: '' +start_time: 1712647776681 +status: 3 +tags: [] +user_id: dhd2000 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/epoch b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/epoch new file mode 100644 index 0000000000000000000000000000000000000000..f432878f3791e0923ec15be54205a5aaf4841476 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/epoch @@ -0,0 +1,79 @@ +1712647780322 0.08 1 +1712647781687 0.08 1 +1712647784574 0.16 2 +1712647787465 0.24 3 +1712647790359 0.32 4 +1712647791728 0.32 4 +1712647794625 0.4 5 +1712647797520 0.48 6 +1712647800416 0.56 7 +1712647803312 0.64 8 +1712647804683 0.64 8 +1712647807578 0.72 9 +1712647810474 0.8 10 +1712647813372 0.88 11 +1712647816270 0.96 12 +1712647817642 0.96 12 +1712647820386 1.04 13 +1712647824221 1.12 14 +1712647827138 1.2 15 +1712647830042 1.28 16 +1712647831420 1.28 16 +1712647834333 1.36 17 +1712647837242 1.44 18 +1712647840147 1.52 19 +1712647843070 1.6 20 +1712647844448 1.6 20 +1712647847363 1.68 21 +1712647850290 1.76 22 +1712647853203 1.84 23 +1712647856116 1.92 24 +1712647857496 1.92 24 +1712647860252 2.0 25 +1712647863163 2.08 26 +1712647866855 2.16 27 +1712647869769 2.24 28 +1712647871150 2.24 28 +1712647874087 2.32 29 +1712647877006 2.4 30 +1712647879921 2.48 31 +1712647882836 2.56 32 +1712647884219 2.56 32 +1712647887139 2.64 33 +1712647890053 2.72 34 +1712647892967 2.8 35 +1712647895882 2.88 36 +1712647897265 2.88 36 +1712647900187 2.96 37 +1712647902945 3.04 38 +1712647905861 3.12 39 +1712647909552 3.2 40 +1712647910933 3.2 40 +1712647913855 3.28 41 +1712647916777 3.36 42 +1712647919694 3.44 43 +1712647922609 3.52 44 +1712647923992 3.52 44 +1712647926911 3.6 45 +1712647929833 3.68 46 +1712647932754 3.76 47 +1712647935680 3.84 48 +1712647937065 3.84 48 +1712647939993 3.92 49 +1712647942758 4.0 50 +1712647945681 4.08 51 +1712647948606 4.16 52 +1712647949993 4.16 52 +1712647953856 4.24 53 +1712647956779 4.32 54 +1712647959701 4.4 55 +1712647962622 4.48 56 +1712647964007 4.48 56 +1712647966930 4.56 57 +1712647969855 4.64 58 +1712647972779 4.72 59 +1712647975702 4.8 60 +1712647977087 4.8 60 +1712647980013 4.88 61 +1712647982936 4.96 62 +1712647983084 4.96 62 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_loss b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_loss new file mode 100644 index 0000000000000000000000000000000000000000..d1a66dddbc3ca9a74013351553978d43683b213e --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_loss @@ -0,0 +1,16 @@ +1712647781687 7.300913333892822 1 +1712647791728 0.7137572169303894 4 +1712647804683 1.0445994138717651 8 +1712647817642 1.3959709405899048 12 +1712647831420 0.9023411870002747 16 +1712647844448 0.4847445785999298 20 +1712647857496 0.5740255117416382 24 +1712647871150 0.3771994113922119 28 +1712647884219 0.29374203085899353 32 +1712647897265 0.5674905180931091 36 +1712647910933 0.2360386848449707 40 +1712647923992 0.13969357311725616 44 +1712647937065 0.2801015079021454 48 +1712647949993 0.19463467597961426 52 +1712647964007 0.16181980073451996 56 +1712647977087 0.14654164016246796 60 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_runtime b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_runtime new file mode 100644 index 0000000000000000000000000000000000000000..ef189ef6fa07aab3fa46dbc931de687ffe239493 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_runtime @@ -0,0 +1,16 @@ +1712647781687 1.3523 1 +1712647791728 1.3532 4 +1712647804683 1.356 8 +1712647817642 1.3583 12 +1712647831420 1.3623 16 +1712647844448 1.3637 20 +1712647857496 1.3655 24 +1712647871150 1.3676 28 +1712647884219 1.3678 32 +1712647897265 1.3681 36 +1712647910933 1.3667 40 +1712647923992 1.3687 44 +1712647937065 1.3706 48 +1712647949993 1.3725 52 +1712647964007 1.3705 56 +1712647977087 1.3704 60 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_samples_per_second b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_samples_per_second new file mode 100644 index 0000000000000000000000000000000000000000..0a5b33b314313207171558aa0073e4afa518c935 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_samples_per_second @@ -0,0 +1,16 @@ +1712647781687 8.873 1 +1712647791728 8.868 4 +1712647804683 8.85 8 +1712647817642 8.835 12 +1712647831420 8.808 16 +1712647844448 8.799 20 +1712647857496 8.788 24 +1712647871150 8.775 28 +1712647884219 8.773 32 +1712647897265 8.771 36 +1712647910933 8.78 40 +1712647923992 8.767 44 +1712647937065 8.755 48 +1712647949993 8.743 52 +1712647964007 8.756 56 +1712647977087 8.756 60 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_steps_per_second b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_steps_per_second new file mode 100644 index 0000000000000000000000000000000000000000..e39c6f2c898cf08fa4d5b51b4bed95021f37aa7a --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_steps_per_second @@ -0,0 +1,16 @@ +1712647781687 4.437 1 +1712647791728 4.434 4 +1712647804683 4.425 8 +1712647817642 4.417 12 +1712647831420 4.404 16 +1712647844448 4.4 20 +1712647857496 4.394 24 +1712647871150 4.387 28 +1712647884219 4.387 32 +1712647897265 4.386 36 +1712647910933 4.39 40 +1712647923992 4.384 44 +1712647937065 4.378 48 +1712647949993 4.372 52 +1712647964007 4.378 56 +1712647977087 4.378 60 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/grad_norm b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/grad_norm new file mode 100644 index 0000000000000000000000000000000000000000..16b0d51cc03f1651d0d04c2865800e79fd25e238 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/grad_norm @@ -0,0 +1,62 @@ +1712647780322 102.28898620605469 1 +1712647784574 103.4541015625 2 +1712647787465 67.47515869140625 3 +1712647790359 72.36919403076172 4 +1712647794625 16.83085060119629 5 +1712647797520 25.897714614868164 6 +1712647800416 18.89151382446289 7 +1712647803312 27.15555763244629 8 +1712647807578 20.812381744384766 9 +1712647810474 56.3886604309082 10 +1712647813372 6.24803352355957 11 +1712647816270 8.379430770874023 12 +1712647820386 20.744348526000977 13 +1712647824221 4.800480842590332 14 +1712647827138 11.284302711486816 15 +1712647830042 5.787976264953613 16 +1712647834333 21.48629379272461 17 +1712647837242 38.0982666015625 18 +1712647840147 10.824837684631348 19 +1712647843070 9.1353120803833 20 +1712647847363 3.8411033153533936 21 +1712647850290 23.888507843017578 22 +1712647853203 8.5956392288208 23 +1712647856116 3.825141191482544 24 +1712647860252 3.558993101119995 25 +1712647863163 11.759211540222168 26 +1712647866855 96.2179183959961 27 +1712647869769 31.022045135498047 28 +1712647874087 3.5004501342773438 29 +1712647877006 5.16464376449585 30 +1712647879921 25.634010314941406 31 +1712647882836 7.102908134460449 32 +1712647887139 6.236325263977051 33 +1712647890053 4.445058345794678 34 +1712647892967 6.94170618057251 35 +1712647895882 5.656033039093018 36 +1712647900187 18.19667625427246 37 +1712647902945 17.964893341064453 38 +1712647905861 5.015590190887451 39 +1712647909552 5.6883225440979 40 +1712647913855 6.636446475982666 41 +1712647916777 4.405576229095459 42 +1712647919694 1.6179524660110474 43 +1712647922609 8.783114433288574 44 +1712647926911 0.8835445046424866 45 +1712647929833 12.678099632263184 46 +1712647932754 5.320870876312256 47 +1712647935680 2.7501108646392822 48 +1712647939993 4.41072940826416 49 +1712647942758 1.2105910778045654 50 +1712647945681 4.502895355224609 51 +1712647948606 6.058006286621094 52 +1712647953856 0.5118169784545898 53 +1712647956779 0.8497004508972168 54 +1712647959701 1.752151608467102 55 +1712647962622 3.6673429012298584 56 +1712647966930 2.9522616863250732 57 +1712647969855 1.5037487745285034 58 +1712647972779 1.4181660413742065 59 +1712647975702 9.207091331481934 60 +1712647980013 1.5023337602615356 61 +1712647982936 2.882874011993408 62 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/learning_rate b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/learning_rate new file mode 100644 index 0000000000000000000000000000000000000000..9ab174ff7c3a8e1b7bc8aef1e35c90f9f4ed54fa --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/learning_rate @@ -0,0 +1,62 @@ +1712647780322 2e-05 1 +1712647784574 4e-05 2 +1712647787465 6e-05 3 +1712647790359 8e-05 4 +1712647794625 0.0001 5 +1712647797520 0.00012 6 +1712647800416 0.00014 7 +1712647803312 0.00016 8 +1712647807578 0.00018 9 +1712647810474 0.0002 10 +1712647813372 0.00019981755542233177 11 +1712647816270 0.0001992708874098054 12 +1712647820386 0.00019836199069471437 13 +1712647824221 0.0001970941817426052 14 +1712647827138 0.00019547208665085457 15 +1712647830042 0.0001935016242685415 16 +1712647834333 0.00019118998459920902 17 +1712647837242 0.000188545602565321 18 +1712647840147 0.00018557812723014476 19 +1712647843070 0.00018229838658936564 20 +1712647847363 0.00017871834806090501 21 +1712647850290 0.00017485107481711012 22 +1712647853203 0.00017071067811865476 23 +1712647856116 0.00016631226582407952 24 +1712647860252 0.00016167188726285434 25 +1712647863163 0.00015680647467311557 26 +1712647866855 0.00015173378141776568 27 +1712647869769 0.00014647231720437686 28 +1712647874087 0.0001410412805452757 29 +1712647877006 0.00013546048870425356 30 +1712647879921 0.00012975030538552032 31 +1712647882836 0.0001239315664287558 32 +1712647887139 0.0001180255037813906 33 +1712647890053 0.0001120536680255323 34 +1712647892967 0.00010603784974222861 35 +1712647895882 0.0001 36 +1712647900187 9.396215025777139e-05 37 +1712647902945 8.79463319744677e-05 38 +1712647905861 8.197449621860943e-05 39 +1712647909552 7.606843357124426e-05 40 +1712647913855 7.024969461447972e-05 41 +1712647916777 6.453951129574644e-05 42 +1712647919694 5.8958719454724346e-05 43 +1712647922609 5.3527682795623146e-05 44 +1712647926911 4.826621858223431e-05 45 +1712647929833 4.3193525326884435e-05 46 +1712647932754 3.832811273714569e-05 47 +1712647935680 3.36877341759205e-05 48 +1712647939993 2.9289321881345254e-05 49 +1712647942758 2.514892518288988e-05 50 +1712647945681 2.1281651939094992e-05 51 +1712647948606 1.7701613410634365e-05 52 +1712647953856 1.442187276985526e-05 53 +1712647956779 1.1454397434679021e-05 54 +1712647959701 8.810015400790994e-06 55 +1712647962622 6.498375731458528e-06 56 +1712647966930 4.527913349145441e-06 57 +1712647969855 2.905818257394799e-06 58 +1712647972779 1.6380093052856483e-06 59 +1712647975702 7.291125901946027e-07 60 +1712647980013 1.824445776682504e-07 61 +1712647982936 0.0 62 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/loss b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/loss new file mode 100644 index 0000000000000000000000000000000000000000..dc235c2bc6e146990750afd93cdd9e06c9311240 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/loss @@ -0,0 +1,62 @@ +1712647780322 6.6367 1 +1712647784574 7.0616 2 +1712647787465 4.686 3 +1712647790359 2.3866 4 +1712647794625 0.6844 5 +1712647797520 0.914 6 +1712647800416 0.63 7 +1712647803312 0.948 8 +1712647807578 1.0285 9 +1712647810474 1.3756 10 +1712647813372 0.5178 11 +1712647816270 0.6822 12 +1712647820386 1.3762 13 +1712647824221 0.5248 14 +1712647827138 0.8094 15 +1712647830042 0.5222 16 +1712647834333 0.8027 17 +1712647837242 1.7772 18 +1712647840147 0.7737 19 +1712647843070 0.534 20 +1712647847363 0.3201 21 +1712647850290 2.2541 22 +1712647853203 0.8177 23 +1712647856116 0.4624 24 +1712647860252 0.3714 25 +1712647863163 0.6562 26 +1712647866855 1.5141 27 +1712647869769 0.7753 28 +1712647874087 0.2649 29 +1712647877006 0.171 30 +1712647879921 0.9172 31 +1712647882836 0.3324 32 +1712647887139 0.4932 33 +1712647890053 0.1284 34 +1712647892967 0.1547 35 +1712647895882 0.1973 36 +1712647900187 0.4884 37 +1712647902945 0.5526 38 +1712647905861 0.2116 39 +1712647909552 0.0843 40 +1712647913855 0.1158 41 +1712647916777 0.2755 42 +1712647919694 0.0186 43 +1712647922609 0.3836 44 +1712647926911 0.0141 45 +1712647929833 0.6196 46 +1712647932754 0.0948 47 +1712647935680 0.0449 48 +1712647939993 0.3026 49 +1712647942758 0.0152 50 +1712647945681 0.0629 51 +1712647948606 0.2246 52 +1712647953856 0.0093 53 +1712647956779 0.0148 54 +1712647959701 0.0392 55 +1712647962622 0.229 56 +1712647966930 0.1722 57 +1712647969855 0.0295 58 +1712647972779 0.03 59 +1712647975702 0.3073 60 +1712647980013 0.032 61 +1712647982936 0.1144 62 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/total_flos b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/total_flos new file mode 100644 index 0000000000000000000000000000000000000000..8d2fbbae034ee18b6a3a59f5cb591adcf3a59c57 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/total_flos @@ -0,0 +1 @@ +1712647983084 5437004879757312.0 62 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_loss b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_loss new file mode 100644 index 0000000000000000000000000000000000000000..e2a63c3c09e7fa944bdf92085bd9ee06f239180b --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_loss @@ -0,0 +1 @@ +1712647983084 0.7901421915739775 62 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_runtime b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_runtime new file mode 100644 index 0000000000000000000000000000000000000000..26e1cedad25e987991767e7433a00d5cc262339b --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_runtime @@ -0,0 +1 @@ +1712647983084 206.4235 62 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_samples_per_second b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_samples_per_second new file mode 100644 index 0000000000000000000000000000000000000000..a778a2ad05b9b53b0322670acdb01e9f3f946fb6 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_samples_per_second @@ -0,0 +1 @@ +1712647983084 2.403 62 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_steps_per_second b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_steps_per_second new file mode 100644 index 0000000000000000000000000000000000000000..1a9f2bb98bcad50d595796ff7acabd42c744bccd --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_steps_per_second @@ -0,0 +1 @@ +1712647983084 0.3 62 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/_name_or_path b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/_name_or_path new file mode 100644 index 0000000000000000000000000000000000000000..33107fdfe860165b41c9d861e55966a6f48293db --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/_name_or_path @@ -0,0 +1 @@ +mistralai/Mistral-7B-v0.1 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/accelerator_config b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/accelerator_config new file mode 100644 index 0000000000000000000000000000000000000000..a2cbdd730f27b5f9f8452fbf3e14cef98c90535f --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/accelerator_config @@ -0,0 +1 @@ +{'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True} \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adafactor b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adafactor new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adafactor @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta1 b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta1 new file mode 100644 index 0000000000000000000000000000000000000000..9a7d84f2a96bb56f53bfc3a42ac10d06459e55c3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta1 @@ -0,0 +1 @@ +0.9 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta2 b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta2 new file mode 100644 index 0000000000000000000000000000000000000000..79cbfdf0652c46b13ed8946e54aa94ff7bdd44ab --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta2 @@ -0,0 +1 @@ +0.999 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_epsilon b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_epsilon new file mode 100644 index 0000000000000000000000000000000000000000..851199be9c9a0b8c721d7f305f5af1759637102d --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_epsilon @@ -0,0 +1 @@ +1e-08 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/add_cross_attention b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/add_cross_attention new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/add_cross_attention @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/architectures b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/architectures new file mode 100644 index 0000000000000000000000000000000000000000..c27f306c1b84f17dbf9aa36d723a7f328b56229f --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/architectures @@ -0,0 +1 @@ +['MistralForCausalLM'] \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/attention_dropout b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/attention_dropout new file mode 100644 index 0000000000000000000000000000000000000000..171538eb0b00f4eddffa17929796de55b838f34b --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/attention_dropout @@ -0,0 +1 @@ +0.0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/auto_find_batch_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/auto_find_batch_size new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/auto_find_batch_size @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bad_words_ids b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bad_words_ids new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bad_words_ids @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/begin_suppress_tokens b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/begin_suppress_tokens new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/begin_suppress_tokens @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_dataset b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_dataset new file mode 100644 index 0000000000000000000000000000000000000000..5372119fb56f731e6e0979f74a8912782bc37aba --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_dataset @@ -0,0 +1 @@ +pharaouk/dharma-1/dharma_1_mini.json \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_source_max_len b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_source_max_len new file mode 100644 index 0000000000000000000000000000000000000000..f3e53ee118f90809468f69873ccb9d675089cd74 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_source_max_len @@ -0,0 +1 @@ +2048 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_split b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_split new file mode 100644 index 0000000000000000000000000000000000000000..08f17520cfb44b4cb37639a20e3a4e65e1b1dfa3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_split @@ -0,0 +1 @@ +eval \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16 b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16 new file mode 100644 index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16 @@ -0,0 +1 @@ +True \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16_full_eval b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16_full_eval new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16_full_eval @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bos_token_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bos_token_id new file mode 100644 index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bos_token_id @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/chunk_size_feed_forward b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/chunk_size_feed_forward new file mode 100644 index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/chunk_size_feed_forward @@ -0,0 +1 @@ +0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_constant_lr_ratio b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_constant_lr_ratio new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_constant_lr_ratio @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_min_lr_ratio b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_min_lr_ratio new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_min_lr_ratio @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cross_attention_hidden_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cross_attention_hidden_size new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cross_attention_hidden_size @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/data_seed b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/data_seed new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/data_seed @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_drop_last b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_drop_last new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_drop_last @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_num_workers b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_num_workers new file mode 100644 index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_num_workers @@ -0,0 +1 @@ +0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_persistent_workers b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_persistent_workers new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_persistent_workers @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_pin_memory b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_pin_memory new file mode 100644 index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_pin_memory @@ -0,0 +1 @@ +True \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_prefetch_factor b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_prefetch_factor new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_prefetch_factor @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_backend b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_backend new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_backend @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_broadcast_buffers b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_broadcast_buffers new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_broadcast_buffers @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_bucket_cap_mb b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_bucket_cap_mb new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_bucket_cap_mb @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_find_unused_parameters b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_find_unused_parameters new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_find_unused_parameters @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_timeout b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_timeout new file mode 100644 index 0000000000000000000000000000000000000000..2974009fd3610ead1e61c1cd20f510a934dd6f91 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_timeout @@ -0,0 +1 @@ +1800 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/debug b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/debug new file mode 100644 index 0000000000000000000000000000000000000000..0637a088a01e8ddab3bf3fa98dbe804cbde1a0dc --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/debug @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/decoder_start_token_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/decoder_start_token_id new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/decoder_start_token_id @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/deepspeed b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/deepspeed new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/deepspeed @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/disable_tqdm b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/disable_tqdm new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/disable_tqdm @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dispatch_batches b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dispatch_batches new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dispatch_batches @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/diversity_penalty b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/diversity_penalty new file mode 100644 index 0000000000000000000000000000000000000000..171538eb0b00f4eddffa17929796de55b838f34b --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/diversity_penalty @@ -0,0 +1 @@ +0.0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_bench_eval b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_bench_eval new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_bench_eval @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_causal_lm_eval b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_causal_lm_eval new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_causal_lm_eval @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_eval b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_eval new file mode 100644 index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_eval @@ -0,0 +1 @@ +True \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_predict b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_predict new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_predict @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_sample b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_sample new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_sample @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_train b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_train new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_train @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/early_stopping b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/early_stopping new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/early_stopping @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/encoder_no_repeat_ngram_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/encoder_no_repeat_ngram_size new file mode 100644 index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/encoder_no_repeat_ngram_size @@ -0,0 +1 @@ +0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eos_token_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eos_token_id new file mode 100644 index 0000000000000000000000000000000000000000..d8263ee9860594d2806b0dfd1bfd17528b0ba2a4 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eos_token_id @@ -0,0 +1 @@ +2 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_accumulation_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_accumulation_steps new file mode 100644 index 0000000000000000000000000000000000000000..bf0d87ab1b2b0ec1a11a3973d2845b42413d9767 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_accumulation_steps @@ -0,0 +1 @@ +4 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_delay b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_delay new file mode 100644 index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_delay @@ -0,0 +1 @@ +0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_sample_packing b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_sample_packing new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_sample_packing @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_steps new file mode 100644 index 0000000000000000000000000000000000000000..30e2fb4d6f4f74b013fc93cbbd90b37101df4148 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_steps @@ -0,0 +1 @@ +0.05 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/evaluation_strategy b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/evaluation_strategy new file mode 100644 index 0000000000000000000000000000000000000000..17f15e19cf5e8064aff8d528657b70e9611eb59e --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/evaluation_strategy @@ -0,0 +1 @@ +steps \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/exponential_decay_length_penalty b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/exponential_decay_length_penalty new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/exponential_decay_length_penalty @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/finetuning_task b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/finetuning_task new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/finetuning_task @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/forced_bos_token_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/forced_bos_token_id new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/forced_bos_token_id @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/forced_eos_token_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/forced_eos_token_id new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/forced_eos_token_id @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16 b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16 new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16 @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_backend b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_backend new file mode 100644 index 0000000000000000000000000000000000000000..4d18c3e59ecf5c28b46b06ce26f2406b2d449870 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_backend @@ -0,0 +1 @@ +auto \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_full_eval b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_full_eval new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_full_eval @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_opt_level b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_opt_level new file mode 100644 index 0000000000000000000000000000000000000000..a9ada426ac8819467c6dc392dcbea40183a3e16e --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_opt_level @@ -0,0 +1 @@ +O1 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp new file mode 100644 index 0000000000000000000000000000000000000000..0637a088a01e8ddab3bf3fa98dbe804cbde1a0dc --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_config b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_config new file mode 100644 index 0000000000000000000000000000000000000000..9d33480169a14dfac929530aefc3cd1f5776a983 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_config @@ -0,0 +1 @@ +{'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_min_num_params b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_min_num_params new file mode 100644 index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_min_num_params @@ -0,0 +1 @@ +0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_transformer_layer_cls_to_wrap b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_transformer_layer_cls_to_wrap new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_transformer_layer_cls_to_wrap @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/full_determinism b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/full_determinism new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/full_determinism @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_accumulation_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_accumulation_steps new file mode 100644 index 0000000000000000000000000000000000000000..bf0d87ab1b2b0ec1a11a3973d2845b42413d9767 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_accumulation_steps @@ -0,0 +1 @@ +4 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_checkpointing b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_checkpointing new file mode 100644 index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_checkpointing @@ -0,0 +1 @@ +True \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_checkpointing_kwargs b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_checkpointing_kwargs new file mode 100644 index 0000000000000000000000000000000000000000..e111fdd6e27ffe7ae81c6da50ec9db10030c98f5 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_checkpointing_kwargs @@ -0,0 +1 @@ +{'use_reentrant': True} \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/greater_is_better b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/greater_is_better new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/greater_is_better @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/group_by_length b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/group_by_length new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/group_by_length @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/half_precision_backend b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/half_precision_backend new file mode 100644 index 0000000000000000000000000000000000000000..4d18c3e59ecf5c28b46b06ce26f2406b2d449870 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/half_precision_backend @@ -0,0 +1 @@ +auto \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hidden_act b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hidden_act new file mode 100644 index 0000000000000000000000000000000000000000..84972cd9564e61cac416981cb71bb1e176046f68 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hidden_act @@ -0,0 +1 @@ +silu \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hidden_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hidden_size new file mode 100644 index 0000000000000000000000000000000000000000..1b18a99c9b4c83c582cf696ed55b1c1d79776fa2 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hidden_size @@ -0,0 +1 @@ +4096 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_always_push b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_always_push new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_always_push @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_model_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_model_id new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_model_id @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_private_repo b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_private_repo new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_private_repo @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_strategy b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_strategy new file mode 100644 index 0000000000000000000000000000000000000000..8532b12ca8add8fe61b84623fab9d559a366ce3c --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_strategy @@ -0,0 +1 @@ +every_save \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_token b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_token new file mode 100644 index 0000000000000000000000000000000000000000..0a574a354979ef783f5f4fe08c3595f79596ff41 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_token @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/id2label b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/id2label new file mode 100644 index 0000000000000000000000000000000000000000..74c276dcae370126a18f5657c0e1ed72e72325e9 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/id2label @@ -0,0 +1 @@ +{0: 'LABEL_0', 1: 'LABEL_1'} \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ignore_data_skip b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ignore_data_skip new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ignore_data_skip @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_inputs_for_metrics b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_inputs_for_metrics new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_inputs_for_metrics @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_num_input_tokens_seen b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_num_input_tokens_seen new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_num_input_tokens_seen @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_tokens_per_second b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_tokens_per_second new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_tokens_per_second @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/initializer_range b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/initializer_range new file mode 100644 index 0000000000000000000000000000000000000000..79dd775c1e90ab736c362ede2f2332678eccf47e --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/initializer_range @@ -0,0 +1 @@ +0.02 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/intermediate_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/intermediate_size new file mode 100644 index 0000000000000000000000000000000000000000..5a65be5c17b7f3ef8c6237c21e0efc9b8a59f1ae --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/intermediate_size @@ -0,0 +1 @@ +14336 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/is_decoder b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/is_decoder new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/is_decoder @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/is_encoder_decoder b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/is_encoder_decoder new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/is_encoder_decoder @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/jit_mode_eval b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/jit_mode_eval new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/jit_mode_eval @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label2id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label2id new file mode 100644 index 0000000000000000000000000000000000000000..0589857be5c3ad7b568bf7c79a4172a5aa887693 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label2id @@ -0,0 +1 @@ +{'LABEL_0': 0, 'LABEL_1': 1} \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label_names b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label_names new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label_names @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label_smoothing_factor b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label_smoothing_factor new file mode 100644 index 0000000000000000000000000000000000000000..171538eb0b00f4eddffa17929796de55b838f34b --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label_smoothing_factor @@ -0,0 +1 @@ +0.0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/learning_rate b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/learning_rate new file mode 100644 index 0000000000000000000000000000000000000000..4c7a7cb48c8bda5e544d360d959f8ef5e7f5778f --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/learning_rate @@ -0,0 +1 @@ +0.0002 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/length_column_name b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/length_column_name new file mode 100644 index 0000000000000000000000000000000000000000..c2e7ec839dabf14d5d59f187c6b8fdb3460872aa --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/length_column_name @@ -0,0 +1 @@ +length \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/length_penalty b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/length_penalty new file mode 100644 index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/length_penalty @@ -0,0 +1 @@ +1.0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/load_best_model_at_end b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/load_best_model_at_end new file mode 100644 index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/load_best_model_at_end @@ -0,0 +1 @@ +True \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/local_rank b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/local_rank new file mode 100644 index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/local_rank @@ -0,0 +1 @@ +0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_level b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_level new file mode 100644 index 0000000000000000000000000000000000000000..ecf328558d66d304c19bdd373f647085a3f0880d --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_level @@ -0,0 +1 @@ +passive \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_level_replica b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_level_replica new file mode 100644 index 0000000000000000000000000000000000000000..14b472df8d4481c6fea79c066ae4650980f02b7c --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_level_replica @@ -0,0 +1 @@ +warning \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_on_each_node b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_on_each_node new file mode 100644 index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_on_each_node @@ -0,0 +1 @@ +True \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_dir b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_dir new file mode 100644 index 0000000000000000000000000000000000000000..0b8b53e9a3065f67eedc90d6329f1f560efab3a1 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_dir @@ -0,0 +1 @@ +./qlora-out/runs/Apr09_08-29-36_gpu06.pri.dmog.alces.network \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_first_step b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_first_step new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_first_step @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_nan_inf_filter b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_nan_inf_filter new file mode 100644 index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_nan_inf_filter @@ -0,0 +1 @@ +True \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_steps new file mode 100644 index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_steps @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_strategy b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_strategy new file mode 100644 index 0000000000000000000000000000000000000000..17f15e19cf5e8064aff8d528657b70e9611eb59e --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_strategy @@ -0,0 +1 @@ +steps \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/loraplus_lr_embedding b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/loraplus_lr_embedding new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/loraplus_lr_embedding @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/loraplus_lr_ratio b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/loraplus_lr_ratio new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/loraplus_lr_ratio @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_quadratic_warmup b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_quadratic_warmup new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_quadratic_warmup @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_scheduler_kwargs b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_scheduler_kwargs new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_scheduler_kwargs @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_scheduler_type b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_scheduler_type new file mode 100644 index 0000000000000000000000000000000000000000..84aa3999b5b7cae7f78b1f77e04d182643005a92 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_scheduler_type @@ -0,0 +1 @@ +cosine \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_bench_samples b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_bench_samples new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_bench_samples @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_grad_norm b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_grad_norm new file mode 100644 index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_grad_norm @@ -0,0 +1 @@ +1.0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_length b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_length new file mode 100644 index 0000000000000000000000000000000000000000..2edeafb09db0093bae6ff060e2dcd2166f5c9387 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_length @@ -0,0 +1 @@ +20 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_position_embeddings b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_position_embeddings new file mode 100644 index 0000000000000000000000000000000000000000..2707c481ad6f123a2d2f15fe38c2bbcf3c32af4b --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_position_embeddings @@ -0,0 +1 @@ +32768 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_seq_length b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_seq_length new file mode 100644 index 0000000000000000000000000000000000000000..ae4d10b425edf2234036e6dd7b07f9bd53fc25e3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_seq_length @@ -0,0 +1 @@ +256 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_steps new file mode 100644 index 0000000000000000000000000000000000000000..b2412e34dff05e77952f3f930772631cac5a3be7 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_steps @@ -0,0 +1 @@ +62 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/metric_for_best_model b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/metric_for_best_model new file mode 100644 index 0000000000000000000000000000000000000000..c476ffb61d3613d976546da2231ec877269c04d6 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/metric_for_best_model @@ -0,0 +1 @@ +loss \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/min_length b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/min_length new file mode 100644 index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/min_length @@ -0,0 +1 @@ +0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/model_type b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/model_type new file mode 100644 index 0000000000000000000000000000000000000000..757dcc3dde6402fef065090ce481dc3cdf34bea1 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/model_type @@ -0,0 +1 @@ +mistral \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/mp_parameters b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/mp_parameters new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/multipack_real_batches b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/multipack_real_batches new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/multipack_real_batches @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/neftune_noise_alpha b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/neftune_noise_alpha new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/neftune_noise_alpha @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/no_cuda b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/no_cuda new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/no_cuda @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/no_repeat_ngram_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/no_repeat_ngram_size new file mode 100644 index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/no_repeat_ngram_size @@ -0,0 +1 @@ +0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_attention_heads b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_attention_heads new file mode 100644 index 0000000000000000000000000000000000000000..1758dddccea2b3b02d21228a0d06a45a35c0d861 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_attention_heads @@ -0,0 +1 @@ +32 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_beam_groups b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_beam_groups new file mode 100644 index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_beam_groups @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_beams b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_beams new file mode 100644 index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_beams @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_hidden_layers b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_hidden_layers new file mode 100644 index 0000000000000000000000000000000000000000..1758dddccea2b3b02d21228a0d06a45a35c0d861 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_hidden_layers @@ -0,0 +1 @@ +32 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_key_value_heads b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_key_value_heads new file mode 100644 index 0000000000000000000000000000000000000000..301160a93062df23030a69f4b5e4d9bf71866ee9 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_key_value_heads @@ -0,0 +1 @@ +8 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_return_sequences b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_return_sequences new file mode 100644 index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_return_sequences @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_train_epochs b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_train_epochs new file mode 100644 index 0000000000000000000000000000000000000000..7813681f5b41c028345ca62a2be376bae70b7f61 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_train_epochs @@ -0,0 +1 @@ +5 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/optim b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/optim new file mode 100644 index 0000000000000000000000000000000000000000..8b5daf75e0cece259f6a6b603b14f124b2cda697 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/optim @@ -0,0 +1 @@ +adamw_bnb_8bit \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/optim_args b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/optim_args new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/optim_args @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_attentions b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_attentions new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_attentions @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_dir b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_dir new file mode 100644 index 0000000000000000000000000000000000000000..99ea28c0280583d6cba05d7aaf0cc98dbfb10fa1 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_dir @@ -0,0 +1 @@ +./qlora-out \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_hidden_states b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_hidden_states new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_hidden_states @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_scores b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_scores new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_scores @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/overwrite_output_dir b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/overwrite_output_dir new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/overwrite_output_dir @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pad_token_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pad_token_id new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pad_token_id @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/past_index b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/past_index new file mode 100644 index 0000000000000000000000000000000000000000..d7d17fcbef95ca19081c4cc5e97cbc592cc7081f --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/past_index @@ -0,0 +1 @@ +-1 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_device_eval_batch_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_device_eval_batch_size new file mode 100644 index 0000000000000000000000000000000000000000..d8263ee9860594d2806b0dfd1bfd17528b0ba2a4 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_device_eval_batch_size @@ -0,0 +1 @@ +2 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_device_train_batch_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_device_train_batch_size new file mode 100644 index 0000000000000000000000000000000000000000..d8263ee9860594d2806b0dfd1bfd17528b0ba2a4 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_device_train_batch_size @@ -0,0 +1 @@ +2 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_gpu_eval_batch_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_gpu_eval_batch_size new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_gpu_eval_batch_size @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_gpu_train_batch_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_gpu_train_batch_size new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_gpu_train_batch_size @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/prediction_loss_only b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/prediction_loss_only new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/prediction_loss_only @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/prefix b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/prefix new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/prefix @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pretraining b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pretraining new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pretraining @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/problem_type b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/problem_type new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/problem_type @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pruned_heads b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pruned_heads new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pruned_heads @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_model_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_model_id new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_model_id @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_organization b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_organization new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_organization @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_token b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_token new file mode 100644 index 0000000000000000000000000000000000000000..36e61093756f7c43b24cd50fc63164c08bcf50f1 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_token @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/quantization_config b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/quantization_config new file mode 100644 index 0000000000000000000000000000000000000000..adf6c34c60d0846bc5fa1f297f34a50e0a26f8b1 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/quantization_config @@ -0,0 +1 @@ +{'quant_method': , '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'bfloat16', 'load_in_4bit': True, 'load_in_8bit': False} \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ray_scope b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ray_scope new file mode 100644 index 0000000000000000000000000000000000000000..1c1206e8bf4337e96dad9a6d139628852077558d --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ray_scope @@ -0,0 +1 @@ +last \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_anneal_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_anneal_steps new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_anneal_steps @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_prune_ratio b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_prune_ratio new file mode 100644 index 0000000000000000000000000000000000000000..9a7d84f2a96bb56f53bfc3a42ac10d06459e55c3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_prune_ratio @@ -0,0 +1 @@ +0.9 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_steps new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_steps @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_warmup_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_warmup_steps new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_warmup_steps @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/remove_invalid_values b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/remove_invalid_values new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/remove_invalid_values @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/remove_unused_columns b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/remove_unused_columns new file mode 100644 index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/remove_unused_columns @@ -0,0 +1 @@ +True \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/repetition_penalty b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/repetition_penalty new file mode 100644 index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/repetition_penalty @@ -0,0 +1 @@ +1.0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/report_to b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/report_to new file mode 100644 index 0000000000000000000000000000000000000000..b4c01973f975b264ce9a4952bbeaa1f1b8bdb018 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/report_to @@ -0,0 +1 @@ +['mlflow', 'tensorboard'] \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/resume_from_checkpoint b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/resume_from_checkpoint new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/resume_from_checkpoint @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/return_dict b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/return_dict new file mode 100644 index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/return_dict @@ -0,0 +1 @@ +True \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/return_dict_in_generate b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/return_dict_in_generate new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/return_dict_in_generate @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/rms_norm_eps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/rms_norm_eps new file mode 100644 index 0000000000000000000000000000000000000000..5868ff147459cee04c24f2de58e75969024870b8 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/rms_norm_eps @@ -0,0 +1 @@ +1e-05 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/rope_theta b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/rope_theta new file mode 100644 index 0000000000000000000000000000000000000000..5e3692287a7d36338465cfdf2af01373b923e614 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/rope_theta @@ -0,0 +1 @@ +10000.0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/run_name b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/run_name new file mode 100644 index 0000000000000000000000000000000000000000..99ea28c0280583d6cba05d7aaf0cc98dbfb10fa1 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/run_name @@ -0,0 +1 @@ +./qlora-out \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing_efficiency b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing_efficiency new file mode 100644 index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing_efficiency @@ -0,0 +1 @@ +1.0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing_seq_len_multiplier b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing_seq_len_multiplier new file mode 100644 index 0000000000000000000000000000000000000000..d8263ee9860594d2806b0dfd1bfd17528b0ba2a4 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing_seq_len_multiplier @@ -0,0 +1 @@ +2 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_on_each_node b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_on_each_node new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_on_each_node @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_only_model b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_only_model new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_only_model @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_safetensors b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_safetensors @@ -0,0 +1 @@ +True \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_steps new file mode 100644 index 0000000000000000000000000000000000000000..2f4536184bcac31936bd15a5f9cf931dd526c022 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_steps @@ -0,0 +1 @@ +0.2 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_strategy b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_strategy new file mode 100644 index 0000000000000000000000000000000000000000..17f15e19cf5e8064aff8d528657b70e9611eb59e --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_strategy @@ -0,0 +1 @@ +steps \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_total_limit b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_total_limit new file mode 100644 index 0000000000000000000000000000000000000000..bf0d87ab1b2b0ec1a11a3973d2845b42413d9767 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_total_limit @@ -0,0 +1 @@ +4 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/seed b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/seed new file mode 100644 index 0000000000000000000000000000000000000000..f70d7bba4ae1f07682e0358bd7a2068094fc023b --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/seed @@ -0,0 +1 @@ +42 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sep_token_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sep_token_id new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sep_token_id @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/skip_memory_metrics b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/skip_memory_metrics new file mode 100644 index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/skip_memory_metrics @@ -0,0 +1 @@ +True \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sliding_window b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sliding_window new file mode 100644 index 0000000000000000000000000000000000000000..1b18a99c9b4c83c582cf696ed55b1c1d79776fa2 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sliding_window @@ -0,0 +1 @@ +4096 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/split_batches b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/split_batches new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/split_batches @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/suppress_tokens b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/suppress_tokens new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/suppress_tokens @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/task_specific_params b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/task_specific_params new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/task_specific_params @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/temperature b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/temperature new file mode 100644 index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/temperature @@ -0,0 +1 @@ +1.0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tf32 b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tf32 new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tf32 @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tf_legacy_loss b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tf_legacy_loss new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tf_legacy_loss @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tie_encoder_decoder b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tie_encoder_decoder new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tie_encoder_decoder @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tie_word_embeddings b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tie_word_embeddings new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tie_word_embeddings @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tokenizer_class b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tokenizer_class new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tokenizer_class @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/top_k b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/top_k new file mode 100644 index 0000000000000000000000000000000000000000..c5b431b6cba29540b4b284840ff229bce0460886 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/top_k @@ -0,0 +1 @@ +50 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/top_p b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/top_p new file mode 100644 index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/top_p @@ -0,0 +1 @@ +1.0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile_backend b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile_backend new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile_backend @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile_mode b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile_mode new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile_mode @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_dtype b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_dtype new file mode 100644 index 0000000000000000000000000000000000000000..8481ec0098496c454d11e66437510c620f01aa78 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_dtype @@ -0,0 +1 @@ +bfloat16 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torchdynamo b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torchdynamo new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torchdynamo @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torchscript b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torchscript new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torchscript @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tpu_metrics_debug b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tpu_metrics_debug new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tpu_metrics_debug @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tpu_num_cores b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tpu_num_cores new file mode 100644 index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tpu_num_cores @@ -0,0 +1 @@ +None \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/transformers_version b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/transformers_version new file mode 100644 index 0000000000000000000000000000000000000000..9ba2e2253c54c56d823046083e9f5b13cd908bce --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/transformers_version @@ -0,0 +1 @@ +4.39.0.dev0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/typical_p b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/typical_p new file mode 100644 index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/typical_p @@ -0,0 +1 @@ +1.0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_bfloat16 b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_bfloat16 new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_bfloat16 @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_cache b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_cache new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_cache @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_cpu b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_cpu new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_cpu @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_ipex b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_ipex new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_ipex @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_legacy_prediction_loop b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_legacy_prediction_loop new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_legacy_prediction_loop @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_mps_device b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_mps_device new file mode 100644 index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_mps_device @@ -0,0 +1 @@ +False \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/vocab_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/vocab_size new file mode 100644 index 0000000000000000000000000000000000000000..be79d9de6ef46aa65d12681dc5186fd34ea022dc --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/vocab_size @@ -0,0 +1 @@ +32000 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/warmup_ratio b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/warmup_ratio new file mode 100644 index 0000000000000000000000000000000000000000..171538eb0b00f4eddffa17929796de55b838f34b --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/warmup_ratio @@ -0,0 +1 @@ +0.0 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/warmup_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/warmup_steps new file mode 100644 index 0000000000000000000000000000000000000000..9a037142aa3c1b4c490e1a38251620f113465330 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/warmup_steps @@ -0,0 +1 @@ +10 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/weight_decay b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/weight_decay new file mode 100644 index 0000000000000000000000000000000000000000..eb5a1db868251c6a5c775b49efde91a5ec3205df --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/weight_decay @@ -0,0 +1 @@ +0.001 \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.runName b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.runName new file mode 100644 index 0000000000000000000000000000000000000000..99ea28c0280583d6cba05d7aaf0cc98dbfb10fa1 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.runName @@ -0,0 +1 @@ +./qlora-out \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.git.commit b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.git.commit new file mode 100644 index 0000000000000000000000000000000000000000..4fc1b7e7153b36dcfda203e96431b2788ace8884 --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.git.commit @@ -0,0 +1 @@ +8984bf17226f3abc4080d0e3decc28ff1d70178b \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.name b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.name new file mode 100644 index 0000000000000000000000000000000000000000..b7588b4c16a4783516407529e743d1a6a58ae99c --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.name @@ -0,0 +1 @@ +/users/dhd2000/axolotl/src/axolotl/cli/train.py \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.type b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.type new file mode 100644 index 0000000000000000000000000000000000000000..0c2c1fe9dc63b7040bb81006635e50fd528f056f --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.type @@ -0,0 +1 @@ +LOCAL \ No newline at end of file diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.user b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.user new file mode 100644 index 0000000000000000000000000000000000000000..7d966af2758f0b52651d138ee5ec43fb59151a4d --- /dev/null +++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.user @@ -0,0 +1 @@ +dhd2000 \ No newline at end of file diff --git a/mlruns/0/meta.yaml b/mlruns/0/meta.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bf7f4eba3a8df361eaca45f4a023a4c1dedc754 --- /dev/null +++ b/mlruns/0/meta.yaml @@ -0,0 +1,6 @@ +artifact_location: file:///mnt/scratch/users/dhd2000/ft14/mlruns/0 +creation_time: 1712647776665 +experiment_id: '0' +last_update_time: 1712647776665 +lifecycle_stage: active +name: Default diff --git a/qlora-out/README.md b/qlora-out/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ebea70108b15703a03e6b25ee9a9f435857a67be --- /dev/null +++ b/qlora-out/README.md @@ -0,0 +1,166 @@ +--- +license: apache-2.0 +library_name: peft +tags: +- generated_from_trainer +base_model: mistralai/Mistral-7B-v0.1 +model-index: +- name: qlora-out + results: [] +--- + + + +[Built with Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) +
See axolotl config + +axolotl version: `0.4.0` +```yaml +base_model: mistralai/Mistral-7B-v0.1 +model_type: MistralForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: caffeinatedcherrychic/cidds-agg-balanced + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.1 +output_dir: ./qlora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 256 +sample_packing: false +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 64 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: +lora_target_modules: + - gate_proj + - down_proj + - up_proj + - q_proj + - v_proj + - k_proj + - o_proj + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 5 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: true +fp16: false +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +loss_watchdog_threshold: 5.0 +loss_watchdog_patience: 3 + +max_steps: 500 +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 1 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.001 +fsdp: +fsdp_config: +special_tokens: + + +``` + +

+ +# qlora-out + +This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset. +It achieves the following results on the evaluation set: +- Loss: 0.1465 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 0.0002 +- train_batch_size: 2 +- eval_batch_size: 2 +- seed: 42 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 8 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 10 +- training_steps: 62 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | +|:-------------:|:-----:|:----:|:---------------:| +| 6.6367 | 0.08 | 1 | 7.3009 | +| 2.3866 | 0.32 | 4 | 0.7138 | +| 0.948 | 0.64 | 8 | 1.0446 | +| 0.6822 | 0.96 | 12 | 1.3960 | +| 0.5222 | 1.28 | 16 | 0.9023 | +| 0.534 | 1.6 | 20 | 0.4847 | +| 0.4624 | 1.92 | 24 | 0.5740 | +| 0.7753 | 2.24 | 28 | 0.3772 | +| 0.3324 | 2.56 | 32 | 0.2937 | +| 0.1973 | 2.88 | 36 | 0.5675 | +| 0.0843 | 3.2 | 40 | 0.2360 | +| 0.3836 | 3.52 | 44 | 0.1397 | +| 0.0449 | 3.84 | 48 | 0.2801 | +| 0.2246 | 4.16 | 52 | 0.1946 | +| 0.229 | 4.48 | 56 | 0.1618 | +| 0.3073 | 4.8 | 60 | 0.1465 | + + +### Framework versions + +- PEFT 0.10.1.dev0 +- Transformers 4.39.0.dev0 +- Pytorch 2.1.2 +- Datasets 2.18.0 +- Tokenizers 0.15.0 \ No newline at end of file diff --git a/qlora-out/adapter_config.json b/qlora-out/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..78cef8595a53cdf83ff2d2b039d6740c09ac7281 --- /dev/null +++ b/qlora-out/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "k_proj", + "gate_proj", + "o_proj", + "down_proj", + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/qlora-out/adapter_model.bin b/qlora-out/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..1f2392f76445b62e1e189b4ce3580a6e1cf85d81 --- /dev/null +++ b/qlora-out/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef894f6daf736ab4a35fe0fba96204d34d3a179661233fc32771e92bcb515b0d +size 335706186 diff --git a/qlora-out/checkpoint-13/README.md b/qlora-out/checkpoint-13/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dcac0d91142fbcc17d01002ba7be40a55deb5c28 --- /dev/null +++ b/qlora-out/checkpoint-13/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.10.1.dev0 \ No newline at end of file diff --git a/qlora-out/checkpoint-13/adapter_config.json b/qlora-out/checkpoint-13/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..78cef8595a53cdf83ff2d2b039d6740c09ac7281 --- /dev/null +++ b/qlora-out/checkpoint-13/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "k_proj", + "gate_proj", + "o_proj", + "down_proj", + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/qlora-out/checkpoint-13/adapter_model.safetensors b/qlora-out/checkpoint-13/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b17e6811e3c4a3d38c6d918307672461b4b20c6c --- /dev/null +++ b/qlora-out/checkpoint-13/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72069b2abc2e8e408822bca99f6492f6272dff7f199d0afff420f28fdcde57ab +size 335604696 diff --git a/qlora-out/checkpoint-13/optimizer.pt b/qlora-out/checkpoint-13/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a69b0f63a51467febdd838d8915b95f3b9ef3dc --- /dev/null +++ b/qlora-out/checkpoint-13/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aff099a7ecc6bc7c04d5f8fd80d2443dd9f492cb12877c91fe4ea29066d9dd08 +size 168624724 diff --git a/qlora-out/checkpoint-13/rng_state.pth b/qlora-out/checkpoint-13/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..06b070741c8d2998636045e4a082ef320b192eef --- /dev/null +++ b/qlora-out/checkpoint-13/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fd0abf3b25d5f521218bb97508206369e6984af4f556dd58b22d5dfbbb6425 +size 14244 diff --git a/qlora-out/checkpoint-13/scheduler.pt b/qlora-out/checkpoint-13/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0d874288e76b9ca8cadf690dd2ac36327360d2b --- /dev/null +++ b/qlora-out/checkpoint-13/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d3b6aff690f8457dc46d75813d9f660109e8ec63e2dc8cbf92e4d726c3a8a8c +size 1064 diff --git a/qlora-out/checkpoint-13/trainer_state.json b/qlora-out/checkpoint-13/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c1194d1e012f34b5a299f9b1b6742b666ad61ef9 --- /dev/null +++ b/qlora-out/checkpoint-13/trainer_state.json @@ -0,0 +1,144 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.04, + "eval_steps": 4, + "global_step": 13, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 102.28898620605469, + "learning_rate": 2e-05, + "loss": 6.6367, + "step": 1 + }, + { + "epoch": 0.08, + "eval_loss": 7.300913333892822, + "eval_runtime": 1.3523, + "eval_samples_per_second": 8.873, + "eval_steps_per_second": 4.437, + "step": 1 + }, + { + "epoch": 0.16, + "grad_norm": 103.4541015625, + "learning_rate": 4e-05, + "loss": 7.0616, + "step": 2 + }, + { + "epoch": 0.24, + "grad_norm": 67.47515869140625, + "learning_rate": 6e-05, + "loss": 4.686, + "step": 3 + }, + { + "epoch": 0.32, + "grad_norm": 72.36919403076172, + "learning_rate": 8e-05, + "loss": 2.3866, + "step": 4 + }, + { + "epoch": 0.32, + "eval_loss": 0.7137572169303894, + "eval_runtime": 1.3532, + "eval_samples_per_second": 8.868, + "eval_steps_per_second": 4.434, + "step": 4 + }, + { + "epoch": 0.4, + "grad_norm": 16.83085060119629, + "learning_rate": 0.0001, + "loss": 0.6844, + "step": 5 + }, + { + "epoch": 0.48, + "grad_norm": 25.897714614868164, + "learning_rate": 0.00012, + "loss": 0.914, + "step": 6 + }, + { + "epoch": 0.56, + "grad_norm": 18.89151382446289, + "learning_rate": 0.00014, + "loss": 0.63, + "step": 7 + }, + { + "epoch": 0.64, + "grad_norm": 27.15555763244629, + "learning_rate": 0.00016, + "loss": 0.948, + "step": 8 + }, + { + "epoch": 0.64, + "eval_loss": 1.0445994138717651, + "eval_runtime": 1.356, + "eval_samples_per_second": 8.85, + "eval_steps_per_second": 4.425, + "step": 8 + }, + { + "epoch": 0.72, + "grad_norm": 20.812381744384766, + "learning_rate": 0.00018, + "loss": 1.0285, + "step": 9 + }, + { + "epoch": 0.8, + "grad_norm": 56.3886604309082, + "learning_rate": 0.0002, + "loss": 1.3756, + "step": 10 + }, + { + "epoch": 0.88, + "grad_norm": 6.24803352355957, + "learning_rate": 0.00019981755542233177, + "loss": 0.5178, + "step": 11 + }, + { + "epoch": 0.96, + "grad_norm": 8.379430770874023, + "learning_rate": 0.0001992708874098054, + "loss": 0.6822, + "step": 12 + }, + { + "epoch": 0.96, + "eval_loss": 1.3959709405899048, + "eval_runtime": 1.3583, + "eval_samples_per_second": 8.835, + "eval_steps_per_second": 4.417, + "step": 12 + }, + { + "epoch": 1.04, + "grad_norm": 20.744348526000977, + "learning_rate": 0.00019836199069471437, + "loss": 1.3762, + "step": 13 + } + ], + "logging_steps": 1, + "max_steps": 62, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 13, + "total_flos": 1138234761412608.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/qlora-out/checkpoint-13/training_args.bin b/qlora-out/checkpoint-13/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3934f8a185d5789fd8a9250dff7398cf3eb121d1 --- /dev/null +++ b/qlora-out/checkpoint-13/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66fede3f83b4ad6ce095e0aa09047d95bd4acc13170780f9e890d9d17d1bdace +size 5624 diff --git a/qlora-out/checkpoint-26/README.md b/qlora-out/checkpoint-26/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dcac0d91142fbcc17d01002ba7be40a55deb5c28 --- /dev/null +++ b/qlora-out/checkpoint-26/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.10.1.dev0 \ No newline at end of file diff --git a/qlora-out/checkpoint-26/adapter_config.json b/qlora-out/checkpoint-26/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..78cef8595a53cdf83ff2d2b039d6740c09ac7281 --- /dev/null +++ b/qlora-out/checkpoint-26/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "k_proj", + "gate_proj", + "o_proj", + "down_proj", + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/qlora-out/checkpoint-26/adapter_model.safetensors b/qlora-out/checkpoint-26/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ab45672da31dcc0e038b019c87497f1f36920c7 --- /dev/null +++ b/qlora-out/checkpoint-26/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:387499c7736d8b7c5cab21843d9b986ad31e4777afa1c953e254a6b821622ab8 +size 335604696 diff --git a/qlora-out/checkpoint-26/optimizer.pt b/qlora-out/checkpoint-26/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bac5a0d811c0aa5ff83105457ef69a16b347d02d --- /dev/null +++ b/qlora-out/checkpoint-26/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c26885c89f597923fecf6d91cf382dfac6eeea66972dd286bb6316360fd0bb69 +size 168624724 diff --git a/qlora-out/checkpoint-26/rng_state.pth b/qlora-out/checkpoint-26/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7ee0ba33d923fb061a56c5fc191e36bf2407d83f --- /dev/null +++ b/qlora-out/checkpoint-26/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69726c1b60735ec075cbe9ef238868d0b5845ade6b93bfd60e810fcee5f233a5 +size 14244 diff --git a/qlora-out/checkpoint-26/scheduler.pt b/qlora-out/checkpoint-26/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b71c24dad62dfe21c7661f7b95351662ad638d28 --- /dev/null +++ b/qlora-out/checkpoint-26/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c22f4d3e17b1ff1ac5db395ab84ba067bc34a07791275897d3efe0cf1944d439 +size 1064 diff --git a/qlora-out/checkpoint-26/trainer_state.json b/qlora-out/checkpoint-26/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f678315072628755e901826a950d986954b8def7 --- /dev/null +++ b/qlora-out/checkpoint-26/trainer_state.json @@ -0,0 +1,259 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.08, + "eval_steps": 4, + "global_step": 26, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 102.28898620605469, + "learning_rate": 2e-05, + "loss": 6.6367, + "step": 1 + }, + { + "epoch": 0.08, + "eval_loss": 7.300913333892822, + "eval_runtime": 1.3523, + "eval_samples_per_second": 8.873, + "eval_steps_per_second": 4.437, + "step": 1 + }, + { + "epoch": 0.16, + "grad_norm": 103.4541015625, + "learning_rate": 4e-05, + "loss": 7.0616, + "step": 2 + }, + { + "epoch": 0.24, + "grad_norm": 67.47515869140625, + "learning_rate": 6e-05, + "loss": 4.686, + "step": 3 + }, + { + "epoch": 0.32, + "grad_norm": 72.36919403076172, + "learning_rate": 8e-05, + "loss": 2.3866, + "step": 4 + }, + { + "epoch": 0.32, + "eval_loss": 0.7137572169303894, + "eval_runtime": 1.3532, + "eval_samples_per_second": 8.868, + "eval_steps_per_second": 4.434, + "step": 4 + }, + { + "epoch": 0.4, + "grad_norm": 16.83085060119629, + "learning_rate": 0.0001, + "loss": 0.6844, + "step": 5 + }, + { + "epoch": 0.48, + "grad_norm": 25.897714614868164, + "learning_rate": 0.00012, + "loss": 0.914, + "step": 6 + }, + { + "epoch": 0.56, + "grad_norm": 18.89151382446289, + "learning_rate": 0.00014, + "loss": 0.63, + "step": 7 + }, + { + "epoch": 0.64, + "grad_norm": 27.15555763244629, + "learning_rate": 0.00016, + "loss": 0.948, + "step": 8 + }, + { + "epoch": 0.64, + "eval_loss": 1.0445994138717651, + "eval_runtime": 1.356, + "eval_samples_per_second": 8.85, + "eval_steps_per_second": 4.425, + "step": 8 + }, + { + "epoch": 0.72, + "grad_norm": 20.812381744384766, + "learning_rate": 0.00018, + "loss": 1.0285, + "step": 9 + }, + { + "epoch": 0.8, + "grad_norm": 56.3886604309082, + "learning_rate": 0.0002, + "loss": 1.3756, + "step": 10 + }, + { + "epoch": 0.88, + "grad_norm": 6.24803352355957, + "learning_rate": 0.00019981755542233177, + "loss": 0.5178, + "step": 11 + }, + { + "epoch": 0.96, + "grad_norm": 8.379430770874023, + "learning_rate": 0.0001992708874098054, + "loss": 0.6822, + "step": 12 + }, + { + "epoch": 0.96, + "eval_loss": 1.3959709405899048, + "eval_runtime": 1.3583, + "eval_samples_per_second": 8.835, + "eval_steps_per_second": 4.417, + "step": 12 + }, + { + "epoch": 1.04, + "grad_norm": 20.744348526000977, + "learning_rate": 0.00019836199069471437, + "loss": 1.3762, + "step": 13 + }, + { + "epoch": 1.12, + "grad_norm": 4.800480842590332, + "learning_rate": 0.0001970941817426052, + "loss": 0.5248, + "step": 14 + }, + { + "epoch": 1.2, + "grad_norm": 11.284302711486816, + "learning_rate": 0.00019547208665085457, + "loss": 0.8094, + "step": 15 + }, + { + "epoch": 1.28, + "grad_norm": 5.787976264953613, + "learning_rate": 0.0001935016242685415, + "loss": 0.5222, + "step": 16 + }, + { + "epoch": 1.28, + "eval_loss": 0.9023411870002747, + "eval_runtime": 1.3623, + "eval_samples_per_second": 8.808, + "eval_steps_per_second": 4.404, + "step": 16 + }, + { + "epoch": 1.36, + "grad_norm": 21.48629379272461, + "learning_rate": 0.00019118998459920902, + "loss": 0.8027, + "step": 17 + }, + { + "epoch": 1.44, + "grad_norm": 38.0982666015625, + "learning_rate": 0.000188545602565321, + "loss": 1.7772, + "step": 18 + }, + { + "epoch": 1.52, + "grad_norm": 10.824837684631348, + "learning_rate": 0.00018557812723014476, + "loss": 0.7737, + "step": 19 + }, + { + "epoch": 1.6, + "grad_norm": 9.1353120803833, + "learning_rate": 0.00018229838658936564, + "loss": 0.534, + "step": 20 + }, + { + "epoch": 1.6, + "eval_loss": 0.4847445785999298, + "eval_runtime": 1.3637, + "eval_samples_per_second": 8.799, + "eval_steps_per_second": 4.4, + "step": 20 + }, + { + "epoch": 1.68, + "grad_norm": 3.8411033153533936, + "learning_rate": 0.00017871834806090501, + "loss": 0.3201, + "step": 21 + }, + { + "epoch": 1.76, + "grad_norm": 23.888507843017578, + "learning_rate": 0.00017485107481711012, + "loss": 2.2541, + "step": 22 + }, + { + "epoch": 1.84, + "grad_norm": 8.5956392288208, + "learning_rate": 0.00017071067811865476, + "loss": 0.8177, + "step": 23 + }, + { + "epoch": 1.92, + "grad_norm": 3.825141191482544, + "learning_rate": 0.00016631226582407952, + "loss": 0.4624, + "step": 24 + }, + { + "epoch": 1.92, + "eval_loss": 0.5740255117416382, + "eval_runtime": 1.3655, + "eval_samples_per_second": 8.788, + "eval_steps_per_second": 4.394, + "step": 24 + }, + { + "epoch": 2.0, + "grad_norm": 3.558993101119995, + "learning_rate": 0.00016167188726285434, + "loss": 0.3714, + "step": 25 + }, + { + "epoch": 2.08, + "grad_norm": 11.759211540222168, + "learning_rate": 0.00015680647467311557, + "loss": 0.6562, + "step": 26 + } + ], + "logging_steps": 1, + "max_steps": 62, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 13, + "total_flos": 2276469522825216.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/qlora-out/checkpoint-26/training_args.bin b/qlora-out/checkpoint-26/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3934f8a185d5789fd8a9250dff7398cf3eb121d1 --- /dev/null +++ b/qlora-out/checkpoint-26/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66fede3f83b4ad6ce095e0aa09047d95bd4acc13170780f9e890d9d17d1bdace +size 5624 diff --git a/qlora-out/checkpoint-39/README.md b/qlora-out/checkpoint-39/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dcac0d91142fbcc17d01002ba7be40a55deb5c28 --- /dev/null +++ b/qlora-out/checkpoint-39/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.10.1.dev0 \ No newline at end of file diff --git a/qlora-out/checkpoint-39/adapter_config.json b/qlora-out/checkpoint-39/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..78cef8595a53cdf83ff2d2b039d6740c09ac7281 --- /dev/null +++ b/qlora-out/checkpoint-39/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "k_proj", + "gate_proj", + "o_proj", + "down_proj", + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/qlora-out/checkpoint-39/adapter_model.safetensors b/qlora-out/checkpoint-39/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55ee4532d97b81f281b67e51ca9f85074606ed3b --- /dev/null +++ b/qlora-out/checkpoint-39/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12b8939dee1c9d7c76fb429805ca8dd1be67417b78ad3ae2622ce37f2a7294d6 +size 335604696 diff --git a/qlora-out/checkpoint-39/optimizer.pt b/qlora-out/checkpoint-39/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..69c87fb68c98328af299c4b05221da1b5858daf7 --- /dev/null +++ b/qlora-out/checkpoint-39/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62c9a9efa8eced911795343502191b7b9044f8b5aa46a6f27343859276faacbc +size 168624724 diff --git a/qlora-out/checkpoint-39/rng_state.pth b/qlora-out/checkpoint-39/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5de01e34c268e2e9de291103c166f1d1e9371ae8 --- /dev/null +++ b/qlora-out/checkpoint-39/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b83b87057617d19867b72bb4f1d7769198abfb127e1bef7a626c1e07b9dee3f2 +size 14244 diff --git a/qlora-out/checkpoint-39/scheduler.pt b/qlora-out/checkpoint-39/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..235fe106cb564e3df82a4f47fcfa5ebb0a72efd2 --- /dev/null +++ b/qlora-out/checkpoint-39/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a8d987b7fe563f350e72415c21199e03eb1c8b092374967d449229a0b0fa9b1 +size 1064 diff --git a/qlora-out/checkpoint-39/trainer_state.json b/qlora-out/checkpoint-39/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cb5e20ec1701c09095f855610376f99c2c833bbc --- /dev/null +++ b/qlora-out/checkpoint-39/trainer_state.json @@ -0,0 +1,374 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.12, + "eval_steps": 4, + "global_step": 39, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 102.28898620605469, + "learning_rate": 2e-05, + "loss": 6.6367, + "step": 1 + }, + { + "epoch": 0.08, + "eval_loss": 7.300913333892822, + "eval_runtime": 1.3523, + "eval_samples_per_second": 8.873, + "eval_steps_per_second": 4.437, + "step": 1 + }, + { + "epoch": 0.16, + "grad_norm": 103.4541015625, + "learning_rate": 4e-05, + "loss": 7.0616, + "step": 2 + }, + { + "epoch": 0.24, + "grad_norm": 67.47515869140625, + "learning_rate": 6e-05, + "loss": 4.686, + "step": 3 + }, + { + "epoch": 0.32, + "grad_norm": 72.36919403076172, + "learning_rate": 8e-05, + "loss": 2.3866, + "step": 4 + }, + { + "epoch": 0.32, + "eval_loss": 0.7137572169303894, + "eval_runtime": 1.3532, + "eval_samples_per_second": 8.868, + "eval_steps_per_second": 4.434, + "step": 4 + }, + { + "epoch": 0.4, + "grad_norm": 16.83085060119629, + "learning_rate": 0.0001, + "loss": 0.6844, + "step": 5 + }, + { + "epoch": 0.48, + "grad_norm": 25.897714614868164, + "learning_rate": 0.00012, + "loss": 0.914, + "step": 6 + }, + { + "epoch": 0.56, + "grad_norm": 18.89151382446289, + "learning_rate": 0.00014, + "loss": 0.63, + "step": 7 + }, + { + "epoch": 0.64, + "grad_norm": 27.15555763244629, + "learning_rate": 0.00016, + "loss": 0.948, + "step": 8 + }, + { + "epoch": 0.64, + "eval_loss": 1.0445994138717651, + "eval_runtime": 1.356, + "eval_samples_per_second": 8.85, + "eval_steps_per_second": 4.425, + "step": 8 + }, + { + "epoch": 0.72, + "grad_norm": 20.812381744384766, + "learning_rate": 0.00018, + "loss": 1.0285, + "step": 9 + }, + { + "epoch": 0.8, + "grad_norm": 56.3886604309082, + "learning_rate": 0.0002, + "loss": 1.3756, + "step": 10 + }, + { + "epoch": 0.88, + "grad_norm": 6.24803352355957, + "learning_rate": 0.00019981755542233177, + "loss": 0.5178, + "step": 11 + }, + { + "epoch": 0.96, + "grad_norm": 8.379430770874023, + "learning_rate": 0.0001992708874098054, + "loss": 0.6822, + "step": 12 + }, + { + "epoch": 0.96, + "eval_loss": 1.3959709405899048, + "eval_runtime": 1.3583, + "eval_samples_per_second": 8.835, + "eval_steps_per_second": 4.417, + "step": 12 + }, + { + "epoch": 1.04, + "grad_norm": 20.744348526000977, + "learning_rate": 0.00019836199069471437, + "loss": 1.3762, + "step": 13 + }, + { + "epoch": 1.12, + "grad_norm": 4.800480842590332, + "learning_rate": 0.0001970941817426052, + "loss": 0.5248, + "step": 14 + }, + { + "epoch": 1.2, + "grad_norm": 11.284302711486816, + "learning_rate": 0.00019547208665085457, + "loss": 0.8094, + "step": 15 + }, + { + "epoch": 1.28, + "grad_norm": 5.787976264953613, + "learning_rate": 0.0001935016242685415, + "loss": 0.5222, + "step": 16 + }, + { + "epoch": 1.28, + "eval_loss": 0.9023411870002747, + "eval_runtime": 1.3623, + "eval_samples_per_second": 8.808, + "eval_steps_per_second": 4.404, + "step": 16 + }, + { + "epoch": 1.36, + "grad_norm": 21.48629379272461, + "learning_rate": 0.00019118998459920902, + "loss": 0.8027, + "step": 17 + }, + { + "epoch": 1.44, + "grad_norm": 38.0982666015625, + "learning_rate": 0.000188545602565321, + "loss": 1.7772, + "step": 18 + }, + { + "epoch": 1.52, + "grad_norm": 10.824837684631348, + "learning_rate": 0.00018557812723014476, + "loss": 0.7737, + "step": 19 + }, + { + "epoch": 1.6, + "grad_norm": 9.1353120803833, + "learning_rate": 0.00018229838658936564, + "loss": 0.534, + "step": 20 + }, + { + "epoch": 1.6, + "eval_loss": 0.4847445785999298, + "eval_runtime": 1.3637, + "eval_samples_per_second": 8.799, + "eval_steps_per_second": 4.4, + "step": 20 + }, + { + "epoch": 1.68, + "grad_norm": 3.8411033153533936, + "learning_rate": 0.00017871834806090501, + "loss": 0.3201, + "step": 21 + }, + { + "epoch": 1.76, + "grad_norm": 23.888507843017578, + "learning_rate": 0.00017485107481711012, + "loss": 2.2541, + "step": 22 + }, + { + "epoch": 1.84, + "grad_norm": 8.5956392288208, + "learning_rate": 0.00017071067811865476, + "loss": 0.8177, + "step": 23 + }, + { + "epoch": 1.92, + "grad_norm": 3.825141191482544, + "learning_rate": 0.00016631226582407952, + "loss": 0.4624, + "step": 24 + }, + { + "epoch": 1.92, + "eval_loss": 0.5740255117416382, + "eval_runtime": 1.3655, + "eval_samples_per_second": 8.788, + "eval_steps_per_second": 4.394, + "step": 24 + }, + { + "epoch": 2.0, + "grad_norm": 3.558993101119995, + "learning_rate": 0.00016167188726285434, + "loss": 0.3714, + "step": 25 + }, + { + "epoch": 2.08, + "grad_norm": 11.759211540222168, + "learning_rate": 0.00015680647467311557, + "loss": 0.6562, + "step": 26 + }, + { + "epoch": 2.16, + "grad_norm": 96.2179183959961, + "learning_rate": 0.00015173378141776568, + "loss": 1.5141, + "step": 27 + }, + { + "epoch": 2.24, + "grad_norm": 31.022045135498047, + "learning_rate": 0.00014647231720437686, + "loss": 0.7753, + "step": 28 + }, + { + "epoch": 2.24, + "eval_loss": 0.3771994113922119, + "eval_runtime": 1.3676, + "eval_samples_per_second": 8.775, + "eval_steps_per_second": 4.387, + "step": 28 + }, + { + "epoch": 2.32, + "grad_norm": 3.5004501342773438, + "learning_rate": 0.0001410412805452757, + "loss": 0.2649, + "step": 29 + }, + { + "epoch": 2.4, + "grad_norm": 5.16464376449585, + "learning_rate": 0.00013546048870425356, + "loss": 0.171, + "step": 30 + }, + { + "epoch": 2.48, + "grad_norm": 25.634010314941406, + "learning_rate": 0.00012975030538552032, + "loss": 0.9172, + "step": 31 + }, + { + "epoch": 2.56, + "grad_norm": 7.102908134460449, + "learning_rate": 0.0001239315664287558, + "loss": 0.3324, + "step": 32 + }, + { + "epoch": 2.56, + "eval_loss": 0.29374203085899353, + "eval_runtime": 1.3678, + "eval_samples_per_second": 8.773, + "eval_steps_per_second": 4.387, + "step": 32 + }, + { + "epoch": 2.64, + "grad_norm": 6.236325263977051, + "learning_rate": 0.0001180255037813906, + "loss": 0.4932, + "step": 33 + }, + { + "epoch": 2.72, + "grad_norm": 4.445058345794678, + "learning_rate": 0.0001120536680255323, + "loss": 0.1284, + "step": 34 + }, + { + "epoch": 2.8, + "grad_norm": 6.94170618057251, + "learning_rate": 0.00010603784974222861, + "loss": 0.1547, + "step": 35 + }, + { + "epoch": 2.88, + "grad_norm": 5.656033039093018, + "learning_rate": 0.0001, + "loss": 0.1973, + "step": 36 + }, + { + "epoch": 2.88, + "eval_loss": 0.5674905180931091, + "eval_runtime": 1.3681, + "eval_samples_per_second": 8.771, + "eval_steps_per_second": 4.386, + "step": 36 + }, + { + "epoch": 2.96, + "grad_norm": 18.19667625427246, + "learning_rate": 9.396215025777139e-05, + "loss": 0.4884, + "step": 37 + }, + { + "epoch": 3.04, + "grad_norm": 17.964893341064453, + "learning_rate": 8.79463319744677e-05, + "loss": 0.5526, + "step": 38 + }, + { + "epoch": 3.12, + "grad_norm": 5.015590190887451, + "learning_rate": 8.197449621860943e-05, + "loss": 0.2116, + "step": 39 + } + ], + "logging_steps": 1, + "max_steps": 62, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 13, + "total_flos": 3414704284237824.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/qlora-out/checkpoint-39/training_args.bin b/qlora-out/checkpoint-39/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3934f8a185d5789fd8a9250dff7398cf3eb121d1 --- /dev/null +++ b/qlora-out/checkpoint-39/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66fede3f83b4ad6ce095e0aa09047d95bd4acc13170780f9e890d9d17d1bdace +size 5624 diff --git a/qlora-out/checkpoint-52/README.md b/qlora-out/checkpoint-52/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dcac0d91142fbcc17d01002ba7be40a55deb5c28 --- /dev/null +++ b/qlora-out/checkpoint-52/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.10.1.dev0 \ No newline at end of file diff --git a/qlora-out/checkpoint-52/adapter_config.json b/qlora-out/checkpoint-52/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..78cef8595a53cdf83ff2d2b039d6740c09ac7281 --- /dev/null +++ b/qlora-out/checkpoint-52/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-v0.1", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "k_proj", + "gate_proj", + "o_proj", + "down_proj", + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/qlora-out/checkpoint-52/adapter_model.safetensors b/qlora-out/checkpoint-52/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f3e3e9824bd4618085512b96aac10c77eb8c5ea --- /dev/null +++ b/qlora-out/checkpoint-52/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61f543a16c2bbb11166292af99cbab42fa039c72766ce2da396aa279512c9d67 +size 335604696 diff --git a/qlora-out/checkpoint-52/optimizer.pt b/qlora-out/checkpoint-52/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc12ac5ed03f5f623348860fc82033908b7d6cdd --- /dev/null +++ b/qlora-out/checkpoint-52/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc05bf731a50997e7af44d91b701be1a9474180b446eef7cccd0a9bb6f49593f +size 168624724 diff --git a/qlora-out/checkpoint-52/rng_state.pth b/qlora-out/checkpoint-52/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9dc5f77bdc7f8a0a4a583cc7b6ea5bb09e3f0320 --- /dev/null +++ b/qlora-out/checkpoint-52/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d215519440d74cb3c2d938d0a6d0dcc602aa66ebc4017b44adae1cc4c34379e9 +size 14244 diff --git a/qlora-out/checkpoint-52/scheduler.pt b/qlora-out/checkpoint-52/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c05e4f67aa0cfdfc7b570266139876956d57a50b --- /dev/null +++ b/qlora-out/checkpoint-52/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:210095055e4e9fa9a08e2ee8a6ef338aebf6d1d63c758470bd2537cf069290da +size 1064 diff --git a/qlora-out/checkpoint-52/trainer_state.json b/qlora-out/checkpoint-52/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..808479166153679b1687744fdeb4ab2771cc8f2f --- /dev/null +++ b/qlora-out/checkpoint-52/trainer_state.json @@ -0,0 +1,497 @@ +{ + "best_metric": 0.19463467597961426, + "best_model_checkpoint": "./qlora-out/checkpoint-52", + "epoch": 4.16, + "eval_steps": 4, + "global_step": 52, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 102.28898620605469, + "learning_rate": 2e-05, + "loss": 6.6367, + "step": 1 + }, + { + "epoch": 0.08, + "eval_loss": 7.300913333892822, + "eval_runtime": 1.3523, + "eval_samples_per_second": 8.873, + "eval_steps_per_second": 4.437, + "step": 1 + }, + { + "epoch": 0.16, + "grad_norm": 103.4541015625, + "learning_rate": 4e-05, + "loss": 7.0616, + "step": 2 + }, + { + "epoch": 0.24, + "grad_norm": 67.47515869140625, + "learning_rate": 6e-05, + "loss": 4.686, + "step": 3 + }, + { + "epoch": 0.32, + "grad_norm": 72.36919403076172, + "learning_rate": 8e-05, + "loss": 2.3866, + "step": 4 + }, + { + "epoch": 0.32, + "eval_loss": 0.7137572169303894, + "eval_runtime": 1.3532, + "eval_samples_per_second": 8.868, + "eval_steps_per_second": 4.434, + "step": 4 + }, + { + "epoch": 0.4, + "grad_norm": 16.83085060119629, + "learning_rate": 0.0001, + "loss": 0.6844, + "step": 5 + }, + { + "epoch": 0.48, + "grad_norm": 25.897714614868164, + "learning_rate": 0.00012, + "loss": 0.914, + "step": 6 + }, + { + "epoch": 0.56, + "grad_norm": 18.89151382446289, + "learning_rate": 0.00014, + "loss": 0.63, + "step": 7 + }, + { + "epoch": 0.64, + "grad_norm": 27.15555763244629, + "learning_rate": 0.00016, + "loss": 0.948, + "step": 8 + }, + { + "epoch": 0.64, + "eval_loss": 1.0445994138717651, + "eval_runtime": 1.356, + "eval_samples_per_second": 8.85, + "eval_steps_per_second": 4.425, + "step": 8 + }, + { + "epoch": 0.72, + "grad_norm": 20.812381744384766, + "learning_rate": 0.00018, + "loss": 1.0285, + "step": 9 + }, + { + "epoch": 0.8, + "grad_norm": 56.3886604309082, + "learning_rate": 0.0002, + "loss": 1.3756, + "step": 10 + }, + { + "epoch": 0.88, + "grad_norm": 6.24803352355957, + "learning_rate": 0.00019981755542233177, + "loss": 0.5178, + "step": 11 + }, + { + "epoch": 0.96, + "grad_norm": 8.379430770874023, + "learning_rate": 0.0001992708874098054, + "loss": 0.6822, + "step": 12 + }, + { + "epoch": 0.96, + "eval_loss": 1.3959709405899048, + "eval_runtime": 1.3583, + "eval_samples_per_second": 8.835, + "eval_steps_per_second": 4.417, + "step": 12 + }, + { + "epoch": 1.04, + "grad_norm": 20.744348526000977, + "learning_rate": 0.00019836199069471437, + "loss": 1.3762, + "step": 13 + }, + { + "epoch": 1.12, + "grad_norm": 4.800480842590332, + "learning_rate": 0.0001970941817426052, + "loss": 0.5248, + "step": 14 + }, + { + "epoch": 1.2, + "grad_norm": 11.284302711486816, + "learning_rate": 0.00019547208665085457, + "loss": 0.8094, + "step": 15 + }, + { + "epoch": 1.28, + "grad_norm": 5.787976264953613, + "learning_rate": 0.0001935016242685415, + "loss": 0.5222, + "step": 16 + }, + { + "epoch": 1.28, + "eval_loss": 0.9023411870002747, + "eval_runtime": 1.3623, + "eval_samples_per_second": 8.808, + "eval_steps_per_second": 4.404, + "step": 16 + }, + { + "epoch": 1.36, + "grad_norm": 21.48629379272461, + "learning_rate": 0.00019118998459920902, + "loss": 0.8027, + "step": 17 + }, + { + "epoch": 1.44, + "grad_norm": 38.0982666015625, + "learning_rate": 0.000188545602565321, + "loss": 1.7772, + "step": 18 + }, + { + "epoch": 1.52, + "grad_norm": 10.824837684631348, + "learning_rate": 0.00018557812723014476, + "loss": 0.7737, + "step": 19 + }, + { + "epoch": 1.6, + "grad_norm": 9.1353120803833, + "learning_rate": 0.00018229838658936564, + "loss": 0.534, + "step": 20 + }, + { + "epoch": 1.6, + "eval_loss": 0.4847445785999298, + "eval_runtime": 1.3637, + "eval_samples_per_second": 8.799, + "eval_steps_per_second": 4.4, + "step": 20 + }, + { + "epoch": 1.68, + "grad_norm": 3.8411033153533936, + "learning_rate": 0.00017871834806090501, + "loss": 0.3201, + "step": 21 + }, + { + "epoch": 1.76, + "grad_norm": 23.888507843017578, + "learning_rate": 0.00017485107481711012, + "loss": 2.2541, + "step": 22 + }, + { + "epoch": 1.84, + "grad_norm": 8.5956392288208, + "learning_rate": 0.00017071067811865476, + "loss": 0.8177, + "step": 23 + }, + { + "epoch": 1.92, + "grad_norm": 3.825141191482544, + "learning_rate": 0.00016631226582407952, + "loss": 0.4624, + "step": 24 + }, + { + "epoch": 1.92, + "eval_loss": 0.5740255117416382, + "eval_runtime": 1.3655, + "eval_samples_per_second": 8.788, + "eval_steps_per_second": 4.394, + "step": 24 + }, + { + "epoch": 2.0, + "grad_norm": 3.558993101119995, + "learning_rate": 0.00016167188726285434, + "loss": 0.3714, + "step": 25 + }, + { + "epoch": 2.08, + "grad_norm": 11.759211540222168, + "learning_rate": 0.00015680647467311557, + "loss": 0.6562, + "step": 26 + }, + { + "epoch": 2.16, + "grad_norm": 96.2179183959961, + "learning_rate": 0.00015173378141776568, + "loss": 1.5141, + "step": 27 + }, + { + "epoch": 2.24, + "grad_norm": 31.022045135498047, + "learning_rate": 0.00014647231720437686, + "loss": 0.7753, + "step": 28 + }, + { + "epoch": 2.24, + "eval_loss": 0.3771994113922119, + "eval_runtime": 1.3676, + "eval_samples_per_second": 8.775, + "eval_steps_per_second": 4.387, + "step": 28 + }, + { + "epoch": 2.32, + "grad_norm": 3.5004501342773438, + "learning_rate": 0.0001410412805452757, + "loss": 0.2649, + "step": 29 + }, + { + "epoch": 2.4, + "grad_norm": 5.16464376449585, + "learning_rate": 0.00013546048870425356, + "loss": 0.171, + "step": 30 + }, + { + "epoch": 2.48, + "grad_norm": 25.634010314941406, + "learning_rate": 0.00012975030538552032, + "loss": 0.9172, + "step": 31 + }, + { + "epoch": 2.56, + "grad_norm": 7.102908134460449, + "learning_rate": 0.0001239315664287558, + "loss": 0.3324, + "step": 32 + }, + { + "epoch": 2.56, + "eval_loss": 0.29374203085899353, + "eval_runtime": 1.3678, + "eval_samples_per_second": 8.773, + "eval_steps_per_second": 4.387, + "step": 32 + }, + { + "epoch": 2.64, + "grad_norm": 6.236325263977051, + "learning_rate": 0.0001180255037813906, + "loss": 0.4932, + "step": 33 + }, + { + "epoch": 2.72, + "grad_norm": 4.445058345794678, + "learning_rate": 0.0001120536680255323, + "loss": 0.1284, + "step": 34 + }, + { + "epoch": 2.8, + "grad_norm": 6.94170618057251, + "learning_rate": 0.00010603784974222861, + "loss": 0.1547, + "step": 35 + }, + { + "epoch": 2.88, + "grad_norm": 5.656033039093018, + "learning_rate": 0.0001, + "loss": 0.1973, + "step": 36 + }, + { + "epoch": 2.88, + "eval_loss": 0.5674905180931091, + "eval_runtime": 1.3681, + "eval_samples_per_second": 8.771, + "eval_steps_per_second": 4.386, + "step": 36 + }, + { + "epoch": 2.96, + "grad_norm": 18.19667625427246, + "learning_rate": 9.396215025777139e-05, + "loss": 0.4884, + "step": 37 + }, + { + "epoch": 3.04, + "grad_norm": 17.964893341064453, + "learning_rate": 8.79463319744677e-05, + "loss": 0.5526, + "step": 38 + }, + { + "epoch": 3.12, + "grad_norm": 5.015590190887451, + "learning_rate": 8.197449621860943e-05, + "loss": 0.2116, + "step": 39 + }, + { + "epoch": 3.2, + "grad_norm": 5.6883225440979, + "learning_rate": 7.606843357124426e-05, + "loss": 0.0843, + "step": 40 + }, + { + "epoch": 3.2, + "eval_loss": 0.2360386848449707, + "eval_runtime": 1.3667, + "eval_samples_per_second": 8.78, + "eval_steps_per_second": 4.39, + "step": 40 + }, + { + "epoch": 3.28, + "grad_norm": 6.636446475982666, + "learning_rate": 7.024969461447972e-05, + "loss": 0.1158, + "step": 41 + }, + { + "epoch": 3.36, + "grad_norm": 4.405576229095459, + "learning_rate": 6.453951129574644e-05, + "loss": 0.2755, + "step": 42 + }, + { + "epoch": 3.44, + "grad_norm": 1.6179524660110474, + "learning_rate": 5.8958719454724346e-05, + "loss": 0.0186, + "step": 43 + }, + { + "epoch": 3.52, + "grad_norm": 8.783114433288574, + "learning_rate": 5.3527682795623146e-05, + "loss": 0.3836, + "step": 44 + }, + { + "epoch": 3.52, + "eval_loss": 0.13969357311725616, + "eval_runtime": 1.3687, + "eval_samples_per_second": 8.767, + "eval_steps_per_second": 4.384, + "step": 44 + }, + { + "epoch": 3.6, + "grad_norm": 0.8835445046424866, + "learning_rate": 4.826621858223431e-05, + "loss": 0.0141, + "step": 45 + }, + { + "epoch": 3.68, + "grad_norm": 12.678099632263184, + "learning_rate": 4.3193525326884435e-05, + "loss": 0.6196, + "step": 46 + }, + { + "epoch": 3.76, + "grad_norm": 5.320870876312256, + "learning_rate": 3.832811273714569e-05, + "loss": 0.0948, + "step": 47 + }, + { + "epoch": 3.84, + "grad_norm": 2.7501108646392822, + "learning_rate": 3.36877341759205e-05, + "loss": 0.0449, + "step": 48 + }, + { + "epoch": 3.84, + "eval_loss": 0.2801015079021454, + "eval_runtime": 1.3706, + "eval_samples_per_second": 8.755, + "eval_steps_per_second": 4.378, + "step": 48 + }, + { + "epoch": 3.92, + "grad_norm": 4.41072940826416, + "learning_rate": 2.9289321881345254e-05, + "loss": 0.3026, + "step": 49 + }, + { + "epoch": 4.0, + "grad_norm": 1.2105910778045654, + "learning_rate": 2.514892518288988e-05, + "loss": 0.0152, + "step": 50 + }, + { + "epoch": 4.08, + "grad_norm": 4.502895355224609, + "learning_rate": 2.1281651939094992e-05, + "loss": 0.0629, + "step": 51 + }, + { + "epoch": 4.16, + "grad_norm": 6.058006286621094, + "learning_rate": 1.7701613410634365e-05, + "loss": 0.2246, + "step": 52 + }, + { + "epoch": 4.16, + "eval_loss": 0.19463467597961426, + "eval_runtime": 1.3725, + "eval_samples_per_second": 8.743, + "eval_steps_per_second": 4.372, + "step": 52 + } + ], + "logging_steps": 1, + "max_steps": 62, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 13, + "total_flos": 4552939045650432.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/qlora-out/checkpoint-52/training_args.bin b/qlora-out/checkpoint-52/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3934f8a185d5789fd8a9250dff7398cf3eb121d1 --- /dev/null +++ b/qlora-out/checkpoint-52/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66fede3f83b4ad6ce095e0aa09047d95bd4acc13170780f9e890d9d17d1bdace +size 5624 diff --git a/qlora-out/config.json b/qlora-out/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1e39741fb665ec4711bd92a0c67178c983710d9 --- /dev/null +++ b/qlora-out/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "mistralai/Mistral-7B-v0.1", + "architectures": [ + "MistralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "model_type": "mistral", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "quantization_config": { + "_load_in_4bit": true, + "_load_in_8bit": false, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "llm_int8_enable_fp32_cpu_offload": false, + "llm_int8_has_fp16_weight": false, + "llm_int8_skip_modules": null, + "llm_int8_threshold": 6.0, + "load_in_4bit": true, + "load_in_8bit": false, + "quant_method": "bitsandbytes" + }, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.39.0.dev0", + "use_cache": false, + "vocab_size": 32000 +} diff --git a/qlora-out/runs/Apr09_08-29-36_gpu06.pri.dmog.alces.network/events.out.tfevents.1712647777.gpu06.pri.dmog.alces.network.30736.0 b/qlora-out/runs/Apr09_08-29-36_gpu06.pri.dmog.alces.network/events.out.tfevents.1712647777.gpu06.pri.dmog.alces.network.30736.0 new file mode 100644 index 0000000000000000000000000000000000000000..6ba64c8069ced15f613f5a512ae2fad7aae6ded9 --- /dev/null +++ b/qlora-out/runs/Apr09_08-29-36_gpu06.pri.dmog.alces.network/events.out.tfevents.1712647777.gpu06.pri.dmog.alces.network.30736.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b916e565a77dcb7d5bd53aba6f367407f84d56fd38e46a20f33d8b05d82f6ec7 +size 23212 diff --git a/qlora-out/special_tokens_map.json b/qlora-out/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/qlora-out/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/qlora-out/tokenizer.model b/qlora-out/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/qlora-out/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/qlora-out/tokenizer_config.json b/qlora-out/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..44ceae3369b580af560afc8670fe5db6f3296960 --- /dev/null +++ b/qlora-out/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +}