Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- config.yaml +79 -0
- dmog/axolotl-test-outputs/test.output +5 -0
- dmog/job.error +162 -0
- dmog/job.output +131 -0
- finetune-test.py +72 -0
- last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/data-00000-of-00001.arrow +3 -0
- last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/dataset_info.json +22 -0
- last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/state.json +16 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/meta.yaml +15 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/epoch +79 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_loss +16 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_runtime +16 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_samples_per_second +16 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_steps_per_second +16 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/grad_norm +62 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/learning_rate +62 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/loss +62 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/total_flos +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_loss +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_runtime +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_samples_per_second +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_steps_per_second +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/_name_or_path +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/accelerator_config +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adafactor +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta1 +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta2 +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_epsilon +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/add_cross_attention +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/architectures +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/attention_dropout +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/auto_find_batch_size +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bad_words_ids +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/begin_suppress_tokens +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_dataset +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_source_max_len +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_split +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16 +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16_full_eval +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bos_token_id +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/chunk_size_feed_forward +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_constant_lr_ratio +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_min_lr_ratio +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cross_attention_hidden_size +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/data_seed +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_drop_last +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_num_workers +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_persistent_workers +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_pin_memory +1 -0
- mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_prefetch_factor +1 -0
config.yaml
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base_model: mistralai/Mistral-7B-v0.1
|
| 2 |
+
model_type: MistralForCausalLM
|
| 3 |
+
tokenizer_type: LlamaTokenizer
|
| 4 |
+
|
| 5 |
+
load_in_8bit: false
|
| 6 |
+
load_in_4bit: true
|
| 7 |
+
strict: false
|
| 8 |
+
|
| 9 |
+
datasets:
|
| 10 |
+
- path: caffeinatedcherrychic/cidds-agg-balanced
|
| 11 |
+
type: alpaca
|
| 12 |
+
dataset_prepared_path: last_run_prepared
|
| 13 |
+
val_set_size: 0.1
|
| 14 |
+
output_dir: ./qlora-out
|
| 15 |
+
|
| 16 |
+
adapter: qlora
|
| 17 |
+
lora_model_dir:
|
| 18 |
+
|
| 19 |
+
sequence_len: 256
|
| 20 |
+
sample_packing: false
|
| 21 |
+
pad_to_sequence_len: true
|
| 22 |
+
|
| 23 |
+
lora_r: 32
|
| 24 |
+
lora_alpha: 64
|
| 25 |
+
lora_dropout: 0.05
|
| 26 |
+
lora_target_linear: true
|
| 27 |
+
lora_fan_in_fan_out:
|
| 28 |
+
lora_target_modules:
|
| 29 |
+
- gate_proj
|
| 30 |
+
- down_proj
|
| 31 |
+
- up_proj
|
| 32 |
+
- q_proj
|
| 33 |
+
- v_proj
|
| 34 |
+
- k_proj
|
| 35 |
+
- o_proj
|
| 36 |
+
|
| 37 |
+
wandb_project:
|
| 38 |
+
wandb_entity:
|
| 39 |
+
wandb_watch:
|
| 40 |
+
wandb_name:
|
| 41 |
+
wandb_log_model:
|
| 42 |
+
|
| 43 |
+
gradient_accumulation_steps: 4
|
| 44 |
+
micro_batch_size: 2
|
| 45 |
+
num_epochs: 5
|
| 46 |
+
optimizer: adamw_bnb_8bit
|
| 47 |
+
lr_scheduler: cosine
|
| 48 |
+
learning_rate: 0.0002
|
| 49 |
+
|
| 50 |
+
train_on_inputs: false
|
| 51 |
+
group_by_length: false
|
| 52 |
+
bf16: true
|
| 53 |
+
fp16: false
|
| 54 |
+
tf32: false
|
| 55 |
+
|
| 56 |
+
gradient_checkpointing: true
|
| 57 |
+
early_stopping_patience:
|
| 58 |
+
resume_from_checkpoint:
|
| 59 |
+
local_rank:
|
| 60 |
+
logging_steps: 1
|
| 61 |
+
xformers_attention:
|
| 62 |
+
flash_attention: true
|
| 63 |
+
|
| 64 |
+
loss_watchdog_threshold: 5.0
|
| 65 |
+
loss_watchdog_patience: 3
|
| 66 |
+
|
| 67 |
+
max_steps: 500
|
| 68 |
+
warmup_steps: 10
|
| 69 |
+
evals_per_epoch: 4
|
| 70 |
+
eval_table_size:
|
| 71 |
+
eval_max_new_tokens: 1
|
| 72 |
+
saves_per_epoch: 1
|
| 73 |
+
debug:
|
| 74 |
+
deepspeed:
|
| 75 |
+
weight_decay: 0.001
|
| 76 |
+
fsdp:
|
| 77 |
+
fsdp_config:
|
| 78 |
+
special_tokens:
|
| 79 |
+
|
dmog/axolotl-test-outputs/test.output
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Hello, dhruti
|
| 2 |
+
#######
|
| 3 |
+
Finetuning
|
| 4 |
+
/mnt/scratch/users/dhd2000/ft14
|
| 5 |
+
#######
|
dmog/job.error
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
0%| | 0/62 [00:00<?, ?it/s]
|
| 1 |
2%|▏ | 1/62 [00:03<03:15, 3.20s/it]
|
| 2 |
|
| 3 |
2%|▏ | 1/62 [00:03<03:15, 3.20s/it]
|
|
|
|
| 4 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 5 |
33%|███▎ | 2/6 [00:00<00:00, 8.95it/s]
|
|
|
|
| 6 |
50%|█████ | 3/6 [00:00<00:00, 6.32it/s]
|
|
|
|
| 7 |
67%|██████▋ | 4/6 [00:00<00:00, 5.47it/s]
|
|
|
|
| 8 |
83%|████████▎ | 5/6 [00:00<00:00, 5.07it/s]
|
|
|
|
| 9 |
|
|
|
|
| 10 |
|
| 11 |
2%|▏ | 1/62 [00:04<03:15, 3.20s/it]
|
|
|
|
|
|
|
| 12 |
|
| 13 |
3%|▎ | 2/62 [00:07<03:49, 3.82s/it]
|
| 14 |
|
| 15 |
3%|▎ | 2/62 [00:07<03:49, 3.82s/it]
|
| 16 |
5%|▍ | 3/62 [00:10<03:20, 3.40s/it]
|
| 17 |
|
| 18 |
5%|▍ | 3/62 [00:10<03:20, 3.40s/it]
|
| 19 |
6%|▋ | 4/62 [00:13<03:05, 3.20s/it]
|
| 20 |
|
| 21 |
6%|▋ | 4/62 [00:13<03:05, 3.20s/it]
|
|
|
|
| 22 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 23 |
33%|███▎ | 2/6 [00:00<00:00, 8.91it/s]
|
|
|
|
| 24 |
50%|█████ | 3/6 [00:00<00:00, 6.29it/s]
|
|
|
|
| 25 |
67%|██████▋ | 4/6 [00:00<00:00, 5.45it/s]
|
|
|
|
| 26 |
83%|████████▎ | 5/6 [00:00<00:00, 5.06it/s]
|
|
|
|
| 27 |
|
|
|
|
| 28 |
|
| 29 |
6%|▋ | 4/62 [00:14<03:05, 3.20s/it]
|
|
|
|
|
|
|
| 30 |
|
| 31 |
8%|▊ | 5/62 [00:17<03:24, 3.58s/it]
|
| 32 |
|
| 33 |
8%|▊ | 5/62 [00:17<03:24, 3.58s/it]
|
| 34 |
10%|▉ | 6/62 [00:20<03:07, 3.35s/it]
|
| 35 |
|
| 36 |
10%|▉ | 6/62 [00:20<03:07, 3.35s/it]
|
| 37 |
11%|█▏ | 7/62 [00:23<02:56, 3.20s/it]
|
| 38 |
|
| 39 |
11%|█▏ | 7/62 [00:23<02:56, 3.20s/it]
|
| 40 |
13%|█▎ | 8/62 [00:26<02:47, 3.10s/it]
|
| 41 |
|
| 42 |
13%|█▎ | 8/62 [00:26<02:47, 3.10s/it]
|
|
|
|
| 43 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 44 |
33%|███▎ | 2/6 [00:00<00:00, 8.92it/s]
|
|
|
|
| 45 |
50%|█████ | 3/6 [00:00<00:00, 6.28it/s]
|
|
|
|
| 46 |
67%|██████▋ | 4/6 [00:00<00:00, 5.44it/s]
|
|
|
|
| 47 |
83%|████████▎ | 5/6 [00:00<00:00, 5.04it/s]
|
|
|
|
| 48 |
|
|
|
|
| 49 |
|
| 50 |
13%|█▎ | 8/62 [00:27<02:47, 3.10s/it]
|
|
|
|
|
|
|
| 51 |
|
| 52 |
15%|█▍ | 9/62 [00:30<03:03, 3.47s/it]
|
| 53 |
|
| 54 |
15%|█▍ | 9/62 [00:30<03:03, 3.47s/it]
|
| 55 |
16%|█▌ | 10/62 [00:33<02:51, 3.29s/it]
|
| 56 |
|
| 57 |
16%|█▌ | 10/62 [00:33<02:51, 3.29s/it]
|
| 58 |
18%|█▊ | 11/62 [00:36<02:41, 3.17s/it]
|
| 59 |
|
| 60 |
18%|█▊ | 11/62 [00:36<02:41, 3.17s/it]
|
| 61 |
19%|█▉ | 12/62 [00:39<02:34, 3.09s/it]
|
| 62 |
|
| 63 |
19%|█▉ | 12/62 [00:39<02:34, 3.09s/it]
|
|
|
|
| 64 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 65 |
33%|███▎ | 2/6 [00:00<00:00, 8.89it/s]
|
|
|
|
| 66 |
50%|█████ | 3/6 [00:00<00:00, 6.27it/s]
|
|
|
|
| 67 |
67%|██████▋ | 4/6 [00:00<00:00, 5.43it/s]
|
|
|
|
| 68 |
83%|████████▎ | 5/6 [00:00<00:00, 5.03it/s]
|
|
|
|
| 69 |
|
|
|
|
| 70 |
|
| 71 |
19%|█▉ | 12/62 [00:40<02:34, 3.09s/it]
|
|
|
|
|
|
|
| 72 |
|
| 73 |
21%|██ | 13/62 [00:43<02:46, 3.40s/it]
|
| 74 |
|
| 75 |
21%|██ | 13/62 [00:43<02:46, 3.40s/it]
|
| 76 |
23%|██▎ | 14/62 [00:47<02:49, 3.53s/it]
|
| 77 |
|
| 78 |
23%|██▎ | 14/62 [00:47<02:49, 3.53s/it]
|
| 79 |
24%|██▍ | 15/62 [00:50<02:37, 3.35s/it]
|
| 80 |
|
| 81 |
24%|██▍ | 15/62 [00:50<02:37, 3.35s/it]
|
| 82 |
26%|██▌ | 16/62 [00:52<02:27, 3.21s/it]
|
| 83 |
|
| 84 |
26%|██▌ | 16/62 [00:52<02:27, 3.21s/it]
|
|
|
|
| 85 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 86 |
33%|███▎ | 2/6 [00:00<00:00, 8.86it/s]
|
|
|
|
| 87 |
50%|█████ | 3/6 [00:00<00:00, 6.24it/s]
|
|
|
|
| 88 |
67%|██████▋ | 4/6 [00:00<00:00, 5.41it/s]
|
|
|
|
| 89 |
83%|████████▎ | 5/6 [00:00<00:00, 5.02it/s]
|
|
|
|
| 90 |
|
|
|
|
| 91 |
|
| 92 |
26%|██▌ | 16/62 [00:54<02:27, 3.21s/it]
|
|
|
|
|
|
|
| 93 |
|
| 94 |
27%|██▋ | 17/62 [00:57<02:39, 3.54s/it]
|
| 95 |
|
| 96 |
27%|██▋ | 17/62 [00:57<02:39, 3.54s/it]
|
| 97 |
29%|██▉ | 18/62 [01:00<02:27, 3.35s/it]
|
| 98 |
|
| 99 |
29%|██▉ | 18/62 [01:00<02:27, 3.35s/it]
|
| 100 |
31%|███ | 19/62 [01:03<02:18, 3.22s/it]
|
| 101 |
|
| 102 |
31%|███ | 19/62 [01:03<02:18, 3.22s/it]
|
| 103 |
32%|███▏ | 20/62 [01:05<02:11, 3.13s/it]
|
| 104 |
|
| 105 |
32%|███▏ | 20/62 [01:05<02:11, 3.13s/it]
|
|
|
|
| 106 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 107 |
33%|███▎ | 2/6 [00:00<00:00, 8.83it/s]
|
|
|
|
| 108 |
50%|█████ | 3/6 [00:00<00:00, 6.24it/s]
|
|
|
|
| 109 |
67%|██████▋ | 4/6 [00:00<00:00, 5.41it/s]
|
|
|
|
| 110 |
83%|████████▎ | 5/6 [00:00<00:00, 5.01it/s]
|
|
|
|
| 111 |
|
|
|
|
| 112 |
|
| 113 |
32%|███▏ | 20/62 [01:07<02:11, 3.13s/it]
|
|
|
|
|
|
|
| 114 |
|
| 115 |
34%|███▍ | 21/62 [01:10<02:22, 3.48s/it]
|
| 116 |
|
| 117 |
34%|███▍ | 21/62 [01:10<02:22, 3.48s/it]
|
| 118 |
35%|███▌ | 22/62 [01:13<02:12, 3.31s/it]
|
| 119 |
|
| 120 |
35%|███▌ | 22/62 [01:13<02:12, 3.31s/it]
|
| 121 |
37%|███▋ | 23/62 [01:16<02:04, 3.19s/it]
|
| 122 |
|
| 123 |
37%|███▋ | 23/62 [01:16<02:04, 3.19s/it]
|
| 124 |
39%|███▊ | 24/62 [01:18<01:58, 3.11s/it]
|
| 125 |
|
| 126 |
39%|███▊ | 24/62 [01:19<01:58, 3.11s/it]
|
|
|
|
| 127 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 128 |
33%|███▎ | 2/6 [00:00<00:00, 8.83it/s]
|
|
|
|
| 129 |
50%|█████ | 3/6 [00:00<00:00, 6.23it/s]
|
|
|
|
| 130 |
67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
|
|
|
|
| 131 |
83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
|
|
|
|
| 132 |
|
|
|
|
| 133 |
|
| 134 |
39%|███▊ | 24/62 [01:20<01:58, 3.11s/it]
|
|
|
|
|
|
|
| 135 |
|
| 136 |
40%|████ | 25/62 [01:23<02:06, 3.42s/it]
|
| 137 |
|
| 138 |
40%|████ | 25/62 [01:23<02:06, 3.42s/it]
|
| 139 |
42%|████▏ | 26/62 [01:26<01:57, 3.27s/it]
|
| 140 |
|
| 141 |
42%|████▏ | 26/62 [01:26<01:57, 3.27s/it]
|
| 142 |
44%|████▎ | 27/62 [01:29<01:58, 3.39s/it]
|
| 143 |
|
| 144 |
44%|████▎ | 27/62 [01:29<01:58, 3.39s/it]
|
| 145 |
45%|████▌ | 28/62 [01:32<01:50, 3.25s/it]
|
| 146 |
|
| 147 |
45%|████▌ | 28/62 [01:32<01:50, 3.25s/it]
|
|
|
|
| 148 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 149 |
33%|███▎ | 2/6 [00:00<00:00, 8.84it/s]
|
|
|
|
| 150 |
50%|█████ | 3/6 [00:00<00:00, 6.23it/s]
|
|
|
|
| 151 |
67%|██████▋ | 4/6 [00:00<00:00, 5.40it/s]
|
|
|
|
| 152 |
83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
|
|
|
|
| 153 |
|
|
|
|
| 154 |
|
| 155 |
45%|████▌ | 28/62 [01:34<01:50, 3.25s/it]
|
|
|
|
|
|
|
| 156 |
|
| 157 |
47%|████▋ | 29/62 [01:36<01:57, 3.57s/it]
|
| 158 |
|
| 159 |
47%|████▋ | 29/62 [01:36<01:57, 3.57s/it]
|
| 160 |
48%|████▊ | 30/62 [01:39<01:47, 3.37s/it]
|
| 161 |
|
| 162 |
48%|████▊ | 30/62 [01:39<01:47, 3.37s/it]
|
| 163 |
50%|█████ | 31/62 [01:42<01:40, 3.24s/it]
|
| 164 |
|
| 165 |
50%|█████ | 31/62 [01:42<01:40, 3.24s/it]
|
| 166 |
52%|█████▏ | 32/62 [01:45<01:34, 3.14s/it]
|
| 167 |
|
| 168 |
52%|█████▏ | 32/62 [01:45<01:34, 3.14s/it]
|
|
|
|
| 169 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 170 |
33%|███▎ | 2/6 [00:00<00:00, 8.81it/s]
|
|
|
|
| 171 |
50%|█████ | 3/6 [00:00<00:00, 6.21it/s]
|
|
|
|
| 172 |
67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
|
|
|
|
| 173 |
83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
|
|
|
|
| 174 |
|
|
|
|
| 175 |
|
| 176 |
52%|█████▏ | 32/62 [01:47<01:34, 3.14s/it]
|
|
|
|
|
|
|
| 177 |
|
| 178 |
53%|█████▎ | 33/62 [01:50<01:41, 3.49s/it]
|
| 179 |
|
| 180 |
53%|█████▎ | 33/62 [01:50<01:41, 3.49s/it]
|
| 181 |
55%|█████▍ | 34/62 [01:52<01:32, 3.32s/it]
|
| 182 |
|
| 183 |
55%|█████▍ | 34/62 [01:52<01:32, 3.32s/it]
|
| 184 |
56%|█████▋ | 35/62 [01:55<01:26, 3.20s/it]
|
| 185 |
|
| 186 |
56%|█████▋ | 35/62 [01:55<01:26, 3.20s/it]
|
| 187 |
58%|█████▊ | 36/62 [01:58<01:20, 3.11s/it]
|
| 188 |
|
| 189 |
58%|█████▊ | 36/62 [01:58<01:20, 3.11s/it]
|
|
|
|
| 190 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 191 |
33%|███▎ | 2/6 [00:00<00:00, 8.82it/s]
|
|
|
|
| 192 |
50%|█████ | 3/6 [00:00<00:00, 6.22it/s]
|
|
|
|
| 193 |
67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
|
|
|
|
| 194 |
83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
|
|
|
|
| 195 |
|
|
|
|
| 196 |
|
| 197 |
58%|█████▊ | 36/62 [02:00<01:20, 3.11s/it]
|
|
|
|
|
|
|
| 198 |
|
| 199 |
60%|█████▉ | 37/62 [02:03<01:26, 3.47s/it]
|
| 200 |
|
| 201 |
60%|█████▉ | 37/62 [02:03<01:26, 3.47s/it]
|
| 202 |
61%|██████▏ | 38/62 [02:05<01:18, 3.26s/it]
|
| 203 |
|
| 204 |
61%|██████▏ | 38/62 [02:05<01:18, 3.26s/it]
|
| 205 |
63%|██████▎ | 39/62 [02:08<01:12, 3.15s/it]
|
| 206 |
|
| 207 |
63%|██████▎ | 39/62 [02:08<01:12, 3.15s/it]
|
| 208 |
65%|██████▍ | 40/62 [02:12<01:12, 3.32s/it]
|
| 209 |
|
| 210 |
65%|██████▍ | 40/62 [02:12<01:12, 3.32s/it]
|
|
|
|
| 211 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 212 |
33%|███▎ | 2/6 [00:00<00:00, 8.84it/s]
|
|
|
|
| 213 |
50%|█████ | 3/6 [00:00<00:00, 6.23it/s]
|
|
|
|
| 214 |
67%|██████▋ | 4/6 [00:00<00:00, 5.40it/s]
|
|
|
|
| 215 |
83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
|
|
|
|
| 216 |
|
|
|
|
| 217 |
|
| 218 |
65%|██████▍ | 40/62 [02:13<01:12, 3.32s/it]
|
|
|
|
|
|
|
| 219 |
|
| 220 |
66%|██████▌ | 41/62 [02:16<01:15, 3.61s/it]
|
| 221 |
|
| 222 |
66%|██████▌ | 41/62 [02:16<01:15, 3.61s/it]
|
| 223 |
68%|██████▊ | 42/62 [02:19<01:08, 3.40s/it]
|
| 224 |
|
| 225 |
68%|██████▊ | 42/62 [02:19<01:08, 3.40s/it]
|
| 226 |
69%|██████▉ | 43/62 [02:22<01:01, 3.26s/it]
|
| 227 |
|
| 228 |
69%|██████▉ | 43/62 [02:22<01:01, 3.26s/it]
|
| 229 |
71%|███████ | 44/62 [02:25<00:56, 3.16s/it]
|
| 230 |
|
| 231 |
71%|███████ | 44/62 [02:25<00:56, 3.16s/it]
|
|
|
|
| 232 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 233 |
33%|███▎ | 2/6 [00:00<00:00, 8.80it/s]
|
|
|
|
| 234 |
50%|█████ | 3/6 [00:00<00:00, 6.21it/s]
|
|
|
|
| 235 |
67%|██████▋ | 4/6 [00:00<00:00, 5.38it/s]
|
|
|
|
| 236 |
83%|████████▎ | 5/6 [00:00<00:00, 4.99it/s]
|
|
|
|
| 237 |
|
|
|
|
| 238 |
|
| 239 |
71%|███████ | 44/62 [02:26<00:56, 3.16s/it]
|
|
|
|
|
|
|
| 240 |
|
| 241 |
73%|███████▎ | 45/62 [02:29<00:59, 3.50s/it]
|
| 242 |
|
| 243 |
73%|███████▎ | 45/62 [02:29<00:59, 3.50s/it]
|
| 244 |
74%|███████▍ | 46/62 [02:32<00:53, 3.33s/it]
|
| 245 |
|
| 246 |
74%|███████▍ | 46/62 [02:32<00:53, 3.33s/it]
|
| 247 |
76%|███████▌ | 47/62 [02:35<00:48, 3.20s/it]
|
| 248 |
|
| 249 |
76%|███████▌ | 47/62 [02:35<00:48, 3.20s/it]
|
| 250 |
77%|███████▋ | 48/62 [02:38<00:43, 3.12s/it]
|
| 251 |
|
| 252 |
77%|███████▋ | 48/62 [02:38<00:43, 3.12s/it]
|
|
|
|
| 253 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 254 |
33%|███▎ | 2/6 [00:00<00:00, 8.80it/s]
|
|
|
|
| 255 |
50%|█████ | 3/6 [00:00<00:00, 6.21it/s]
|
|
|
|
| 256 |
67%|██████▋ | 4/6 [00:00<00:00, 5.38it/s]
|
|
|
|
| 257 |
83%|████████▎ | 5/6 [00:00<00:00, 4.98it/s]
|
|
|
|
| 258 |
|
|
|
|
| 259 |
|
| 260 |
77%|███████▋ | 48/62 [02:39<00:43, 3.12s/it]
|
|
|
|
|
|
|
| 261 |
|
| 262 |
79%|███████▉ | 49/62 [02:42<00:45, 3.48s/it]
|
| 263 |
|
| 264 |
79%|███████▉ | 49/62 [02:42<00:45, 3.48s/it]
|
| 265 |
81%|████████ | 50/62 [02:45<00:39, 3.26s/it]
|
| 266 |
|
| 267 |
81%|████████ | 50/62 [02:45<00:39, 3.26s/it]
|
| 268 |
82%|████████▏ | 51/62 [02:48<00:34, 3.16s/it]
|
| 269 |
|
| 270 |
82%|████████▏ | 51/62 [02:48<00:34, 3.16s/it]
|
| 271 |
84%|████████▍ | 52/62 [02:51<00:30, 3.09s/it]
|
| 272 |
|
| 273 |
84%|████████▍ | 52/62 [02:51<00:30, 3.09s/it]
|
|
|
|
| 274 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 275 |
33%|███▎ | 2/6 [00:00<00:00, 8.78it/s]
|
|
|
|
| 276 |
50%|█████ | 3/6 [00:00<00:00, 6.18it/s]
|
|
|
|
| 277 |
67%|██████▋ | 4/6 [00:00<00:00, 5.37it/s]
|
|
|
|
| 278 |
83%|████████▎ | 5/6 [00:00<00:00, 4.98it/s]
|
|
|
|
| 279 |
|
|
|
|
| 280 |
|
| 281 |
84%|████████▍ | 52/62 [02:52<00:30, 3.09s/it]
|
|
|
|
|
|
|
| 282 |
|
| 283 |
85%|████████▌ | 53/62 [02:56<00:33, 3.74s/it]
|
| 284 |
|
| 285 |
85%|████████▌ | 53/62 [02:56<00:33, 3.74s/it]
|
| 286 |
87%|████████▋ | 54/62 [02:59<00:27, 3.49s/it]
|
| 287 |
|
| 288 |
87%|████████▋ | 54/62 [02:59<00:27, 3.49s/it]
|
| 289 |
89%|████████▊ | 55/62 [03:02<00:23, 3.32s/it]
|
| 290 |
|
| 291 |
89%|████████▊ | 55/62 [03:02<00:23, 3.32s/it]
|
| 292 |
90%|█████████ | 56/62 [03:05<00:19, 3.20s/it]
|
| 293 |
|
| 294 |
90%|█████████ | 56/62 [03:05<00:19, 3.20s/it]
|
|
|
|
| 295 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 296 |
33%|███▎ | 2/6 [00:00<00:00, 8.79it/s]
|
|
|
|
| 297 |
50%|█████ | 3/6 [00:00<00:00, 6.19it/s]
|
|
|
|
| 298 |
67%|██████▋ | 4/6 [00:00<00:00, 5.37it/s]
|
|
|
|
| 299 |
83%|████████▎ | 5/6 [00:00<00:00, 4.99it/s]
|
|
|
|
| 300 |
|
|
|
|
| 301 |
|
| 302 |
90%|█████████ | 56/62 [03:06<00:19, 3.20s/it]
|
|
|
|
|
|
|
| 303 |
|
| 304 |
92%|█████████▏| 57/62 [03:09<00:17, 3.53s/it]
|
| 305 |
|
| 306 |
92%|█████████▏| 57/62 [03:09<00:17, 3.53s/it]
|
| 307 |
94%|█████████▎| 58/62 [03:12<00:13, 3.35s/it]
|
| 308 |
|
| 309 |
94%|█████████▎| 58/62 [03:12<00:13, 3.35s/it]
|
| 310 |
95%|█████████▌| 59/62 [03:15<00:09, 3.22s/it]
|
| 311 |
|
| 312 |
95%|█████████▌| 59/62 [03:15<00:09, 3.22s/it]
|
| 313 |
97%|█████████▋| 60/62 [03:18<00:06, 3.13s/it]
|
| 314 |
|
| 315 |
97%|█████████▋| 60/62 [03:18<00:06, 3.13s/it]
|
|
|
|
| 316 |
0%| | 0/6 [00:00<?, ?it/s]
|
|
|
|
| 317 |
33%|███▎ | 2/6 [00:00<00:00, 8.81it/s]
|
|
|
|
| 318 |
50%|█████ | 3/6 [00:00<00:00, 6.22it/s]
|
|
|
|
| 319 |
67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
|
|
|
|
| 320 |
83%|████████▎ | 5/6 [00:00<00:00, 4.99it/s]
|
|
|
|
| 321 |
|
|
|
|
| 322 |
|
| 323 |
97%|█████████▋| 60/62 [03:19<00:06, 3.13s/it]
|
|
|
|
|
|
|
| 324 |
|
| 325 |
98%|█████████▊| 61/62 [03:22<00:03, 3.49s/it]
|
| 326 |
|
| 327 |
98%|█████████▊| 61/62 [03:22<00:03, 3.49s/it]
|
| 328 |
|
| 329 |
|
|
|
|
| 1 |
+
[38;5;127mmpi[0m/[38;5;172mopenmpi[0m/[38;5;67m4.1.5[0m/[38;5;68mgcc-4.8.5[0m
|
| 2 |
+
| -- [38;5;127mlibs[0m/[38;5;172mgcc[0m/[38;5;67msystem[0m
|
| 3 |
+
| * --> [0;32mOK[0m
|
| 4 |
+
|
|
| 5 |
+
[0;32mOK[0m
|
| 6 |
+
[38;5;127mmpi[0m/[38;5;172mopenmpi[0m/[38;5;67m4.1.5[0m/[38;5;68mgcc-4.8.5[0m ... UNLOADING --> [0;32mOK[0m
|
| 7 |
+
[38;5;127mlibs[0m/[38;5;172mgcc[0m/[38;5;67msystem[0m ... UNLOADING --> [0;32mOK[0m
|
| 8 |
+
The following values were not passed to `accelerate launch` and had defaults used instead:
|
| 9 |
+
`--num_processes` was set to a value of `1`
|
| 10 |
+
`--num_machines` was set to a value of `1`
|
| 11 |
+
`--mixed_precision` was set to a value of `'no'`
|
| 12 |
+
`--dynamo_backend` was set to a value of `'no'`
|
| 13 |
+
To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
|
| 19 |
0%| | 0/62 [00:00<?, ?it/s]
|
| 20 |
2%|▏ | 1/62 [00:03<03:15, 3.20s/it]
|
| 21 |
|
| 22 |
2%|▏ | 1/62 [00:03<03:15, 3.20s/it]
|
| 23 |
+
|
| 24 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 25 |
+
|
| 26 |
33%|███▎ | 2/6 [00:00<00:00, 8.95it/s]
|
| 27 |
+
|
| 28 |
50%|█████ | 3/6 [00:00<00:00, 6.32it/s]
|
| 29 |
+
|
| 30 |
67%|██████▋ | 4/6 [00:00<00:00, 5.47it/s]
|
| 31 |
+
|
| 32 |
83%|████████▎ | 5/6 [00:00<00:00, 5.07it/s]
|
| 33 |
+
|
| 34 |
|
| 35 |
+
|
| 36 |
|
| 37 |
2%|▏ | 1/62 [00:04<03:15, 3.20s/it]
|
| 38 |
+
|
| 39 |
+
|
| 40 |
|
| 41 |
3%|▎ | 2/62 [00:07<03:49, 3.82s/it]
|
| 42 |
|
| 43 |
3%|▎ | 2/62 [00:07<03:49, 3.82s/it]
|
| 44 |
5%|▍ | 3/62 [00:10<03:20, 3.40s/it]
|
| 45 |
|
| 46 |
5%|▍ | 3/62 [00:10<03:20, 3.40s/it]
|
| 47 |
6%|▋ | 4/62 [00:13<03:05, 3.20s/it]
|
| 48 |
|
| 49 |
6%|▋ | 4/62 [00:13<03:05, 3.20s/it]
|
| 50 |
+
|
| 51 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 52 |
+
|
| 53 |
33%|███▎ | 2/6 [00:00<00:00, 8.91it/s]
|
| 54 |
+
|
| 55 |
50%|█████ | 3/6 [00:00<00:00, 6.29it/s]
|
| 56 |
+
|
| 57 |
67%|██████▋ | 4/6 [00:00<00:00, 5.45it/s]
|
| 58 |
+
|
| 59 |
83%|████████▎ | 5/6 [00:00<00:00, 5.06it/s]
|
| 60 |
+
|
| 61 |
|
| 62 |
+
|
| 63 |
|
| 64 |
6%|▋ | 4/62 [00:14<03:05, 3.20s/it]
|
| 65 |
+
|
| 66 |
+
|
| 67 |
|
| 68 |
8%|▊ | 5/62 [00:17<03:24, 3.58s/it]
|
| 69 |
|
| 70 |
8%|▊ | 5/62 [00:17<03:24, 3.58s/it]
|
| 71 |
10%|▉ | 6/62 [00:20<03:07, 3.35s/it]
|
| 72 |
|
| 73 |
10%|▉ | 6/62 [00:20<03:07, 3.35s/it]
|
| 74 |
11%|█▏ | 7/62 [00:23<02:56, 3.20s/it]
|
| 75 |
|
| 76 |
11%|█▏ | 7/62 [00:23<02:56, 3.20s/it]
|
| 77 |
13%|█▎ | 8/62 [00:26<02:47, 3.10s/it]
|
| 78 |
|
| 79 |
13%|█▎ | 8/62 [00:26<02:47, 3.10s/it]
|
| 80 |
+
|
| 81 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 82 |
+
|
| 83 |
33%|███▎ | 2/6 [00:00<00:00, 8.92it/s]
|
| 84 |
+
|
| 85 |
50%|█████ | 3/6 [00:00<00:00, 6.28it/s]
|
| 86 |
+
|
| 87 |
67%|██████▋ | 4/6 [00:00<00:00, 5.44it/s]
|
| 88 |
+
|
| 89 |
83%|████████▎ | 5/6 [00:00<00:00, 5.04it/s]
|
| 90 |
+
|
| 91 |
|
| 92 |
+
|
| 93 |
|
| 94 |
13%|█▎ | 8/62 [00:27<02:47, 3.10s/it]
|
| 95 |
+
|
| 96 |
+
|
| 97 |
|
| 98 |
15%|█▍ | 9/62 [00:30<03:03, 3.47s/it]
|
| 99 |
|
| 100 |
15%|█▍ | 9/62 [00:30<03:03, 3.47s/it]
|
| 101 |
16%|█▌ | 10/62 [00:33<02:51, 3.29s/it]
|
| 102 |
|
| 103 |
16%|█▌ | 10/62 [00:33<02:51, 3.29s/it]
|
| 104 |
18%|█▊ | 11/62 [00:36<02:41, 3.17s/it]
|
| 105 |
|
| 106 |
18%|█▊ | 11/62 [00:36<02:41, 3.17s/it]
|
| 107 |
19%|█▉ | 12/62 [00:39<02:34, 3.09s/it]
|
| 108 |
|
| 109 |
19%|█▉ | 12/62 [00:39<02:34, 3.09s/it]
|
| 110 |
+
|
| 111 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 112 |
+
|
| 113 |
33%|███▎ | 2/6 [00:00<00:00, 8.89it/s]
|
| 114 |
+
|
| 115 |
50%|█████ | 3/6 [00:00<00:00, 6.27it/s]
|
| 116 |
+
|
| 117 |
67%|██████▋ | 4/6 [00:00<00:00, 5.43it/s]
|
| 118 |
+
|
| 119 |
83%|████████▎ | 5/6 [00:00<00:00, 5.03it/s]
|
| 120 |
+
|
| 121 |
|
| 122 |
+
|
| 123 |
|
| 124 |
19%|█▉ | 12/62 [00:40<02:34, 3.09s/it]
|
| 125 |
+
|
| 126 |
+
|
| 127 |
|
| 128 |
21%|██ | 13/62 [00:43<02:46, 3.40s/it]
|
| 129 |
|
| 130 |
21%|██ | 13/62 [00:43<02:46, 3.40s/it]
|
| 131 |
23%|██▎ | 14/62 [00:47<02:49, 3.53s/it]
|
| 132 |
|
| 133 |
23%|██▎ | 14/62 [00:47<02:49, 3.53s/it]
|
| 134 |
24%|██▍ | 15/62 [00:50<02:37, 3.35s/it]
|
| 135 |
|
| 136 |
24%|██▍ | 15/62 [00:50<02:37, 3.35s/it]
|
| 137 |
26%|██▌ | 16/62 [00:52<02:27, 3.21s/it]
|
| 138 |
|
| 139 |
26%|██▌ | 16/62 [00:52<02:27, 3.21s/it]
|
| 140 |
+
|
| 141 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 142 |
+
|
| 143 |
33%|███▎ | 2/6 [00:00<00:00, 8.86it/s]
|
| 144 |
+
|
| 145 |
50%|█████ | 3/6 [00:00<00:00, 6.24it/s]
|
| 146 |
+
|
| 147 |
67%|██████▋ | 4/6 [00:00<00:00, 5.41it/s]
|
| 148 |
+
|
| 149 |
83%|████████▎ | 5/6 [00:00<00:00, 5.02it/s]
|
| 150 |
+
|
| 151 |
|
| 152 |
+
|
| 153 |
|
| 154 |
26%|██▌ | 16/62 [00:54<02:27, 3.21s/it]
|
| 155 |
+
|
| 156 |
+
|
| 157 |
|
| 158 |
27%|██▋ | 17/62 [00:57<02:39, 3.54s/it]
|
| 159 |
|
| 160 |
27%|██▋ | 17/62 [00:57<02:39, 3.54s/it]
|
| 161 |
29%|██▉ | 18/62 [01:00<02:27, 3.35s/it]
|
| 162 |
|
| 163 |
29%|██▉ | 18/62 [01:00<02:27, 3.35s/it]
|
| 164 |
31%|███ | 19/62 [01:03<02:18, 3.22s/it]
|
| 165 |
|
| 166 |
31%|███ | 19/62 [01:03<02:18, 3.22s/it]
|
| 167 |
32%|███▏ | 20/62 [01:05<02:11, 3.13s/it]
|
| 168 |
|
| 169 |
32%|███▏ | 20/62 [01:05<02:11, 3.13s/it]
|
| 170 |
+
|
| 171 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 172 |
+
|
| 173 |
33%|███▎ | 2/6 [00:00<00:00, 8.83it/s]
|
| 174 |
+
|
| 175 |
50%|█████ | 3/6 [00:00<00:00, 6.24it/s]
|
| 176 |
+
|
| 177 |
67%|██████▋ | 4/6 [00:00<00:00, 5.41it/s]
|
| 178 |
+
|
| 179 |
83%|████████▎ | 5/6 [00:00<00:00, 5.01it/s]
|
| 180 |
+
|
| 181 |
|
| 182 |
+
|
| 183 |
|
| 184 |
32%|███▏ | 20/62 [01:07<02:11, 3.13s/it]
|
| 185 |
+
|
| 186 |
+
|
| 187 |
|
| 188 |
34%|███▍ | 21/62 [01:10<02:22, 3.48s/it]
|
| 189 |
|
| 190 |
34%|███▍ | 21/62 [01:10<02:22, 3.48s/it]
|
| 191 |
35%|███▌ | 22/62 [01:13<02:12, 3.31s/it]
|
| 192 |
|
| 193 |
35%|███▌ | 22/62 [01:13<02:12, 3.31s/it]
|
| 194 |
37%|███▋ | 23/62 [01:16<02:04, 3.19s/it]
|
| 195 |
|
| 196 |
37%|███▋ | 23/62 [01:16<02:04, 3.19s/it]
|
| 197 |
39%|███▊ | 24/62 [01:18<01:58, 3.11s/it]
|
| 198 |
|
| 199 |
39%|███▊ | 24/62 [01:19<01:58, 3.11s/it]
|
| 200 |
+
|
| 201 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 202 |
+
|
| 203 |
33%|███▎ | 2/6 [00:00<00:00, 8.83it/s]
|
| 204 |
+
|
| 205 |
50%|█████ | 3/6 [00:00<00:00, 6.23it/s]
|
| 206 |
+
|
| 207 |
67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
|
| 208 |
+
|
| 209 |
83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
|
| 210 |
+
|
| 211 |
|
| 212 |
+
|
| 213 |
|
| 214 |
39%|███▊ | 24/62 [01:20<01:58, 3.11s/it]
|
| 215 |
+
|
| 216 |
+
|
| 217 |
|
| 218 |
40%|████ | 25/62 [01:23<02:06, 3.42s/it]
|
| 219 |
|
| 220 |
40%|████ | 25/62 [01:23<02:06, 3.42s/it]
|
| 221 |
42%|████▏ | 26/62 [01:26<01:57, 3.27s/it]
|
| 222 |
|
| 223 |
42%|████▏ | 26/62 [01:26<01:57, 3.27s/it]
|
| 224 |
44%|████▎ | 27/62 [01:29<01:58, 3.39s/it]
|
| 225 |
|
| 226 |
44%|████▎ | 27/62 [01:29<01:58, 3.39s/it]
|
| 227 |
45%|████▌ | 28/62 [01:32<01:50, 3.25s/it]
|
| 228 |
|
| 229 |
45%|████▌ | 28/62 [01:32<01:50, 3.25s/it]
|
| 230 |
+
|
| 231 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 232 |
+
|
| 233 |
33%|███▎ | 2/6 [00:00<00:00, 8.84it/s]
|
| 234 |
+
|
| 235 |
50%|█████ | 3/6 [00:00<00:00, 6.23it/s]
|
| 236 |
+
|
| 237 |
67%|██████▋ | 4/6 [00:00<00:00, 5.40it/s]
|
| 238 |
+
|
| 239 |
83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
|
| 240 |
+
|
| 241 |
|
| 242 |
+
|
| 243 |
|
| 244 |
45%|████▌ | 28/62 [01:34<01:50, 3.25s/it]
|
| 245 |
+
|
| 246 |
+
|
| 247 |
|
| 248 |
47%|████▋ | 29/62 [01:36<01:57, 3.57s/it]
|
| 249 |
|
| 250 |
47%|████▋ | 29/62 [01:36<01:57, 3.57s/it]
|
| 251 |
48%|████▊ | 30/62 [01:39<01:47, 3.37s/it]
|
| 252 |
|
| 253 |
48%|████▊ | 30/62 [01:39<01:47, 3.37s/it]
|
| 254 |
50%|█████ | 31/62 [01:42<01:40, 3.24s/it]
|
| 255 |
|
| 256 |
50%|█████ | 31/62 [01:42<01:40, 3.24s/it]
|
| 257 |
52%|█████▏ | 32/62 [01:45<01:34, 3.14s/it]
|
| 258 |
|
| 259 |
52%|█████▏ | 32/62 [01:45<01:34, 3.14s/it]
|
| 260 |
+
|
| 261 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 262 |
+
|
| 263 |
33%|███▎ | 2/6 [00:00<00:00, 8.81it/s]
|
| 264 |
+
|
| 265 |
50%|█████ | 3/6 [00:00<00:00, 6.21it/s]
|
| 266 |
+
|
| 267 |
67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
|
| 268 |
+
|
| 269 |
83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
|
| 270 |
+
|
| 271 |
|
| 272 |
+
|
| 273 |
|
| 274 |
52%|█████▏ | 32/62 [01:47<01:34, 3.14s/it]
|
| 275 |
+
|
| 276 |
+
|
| 277 |
|
| 278 |
53%|█████▎ | 33/62 [01:50<01:41, 3.49s/it]
|
| 279 |
|
| 280 |
53%|█████▎ | 33/62 [01:50<01:41, 3.49s/it]
|
| 281 |
55%|█████▍ | 34/62 [01:52<01:32, 3.32s/it]
|
| 282 |
|
| 283 |
55%|█████▍ | 34/62 [01:52<01:32, 3.32s/it]
|
| 284 |
56%|█████▋ | 35/62 [01:55<01:26, 3.20s/it]
|
| 285 |
|
| 286 |
56%|█████▋ | 35/62 [01:55<01:26, 3.20s/it]
|
| 287 |
58%|█████▊ | 36/62 [01:58<01:20, 3.11s/it]
|
| 288 |
|
| 289 |
58%|█████▊ | 36/62 [01:58<01:20, 3.11s/it]
|
| 290 |
+
|
| 291 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 292 |
+
|
| 293 |
33%|███▎ | 2/6 [00:00<00:00, 8.82it/s]
|
| 294 |
+
|
| 295 |
50%|█████ | 3/6 [00:00<00:00, 6.22it/s]
|
| 296 |
+
|
| 297 |
67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
|
| 298 |
+
|
| 299 |
83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
|
| 300 |
+
|
| 301 |
|
| 302 |
+
|
| 303 |
|
| 304 |
58%|█████▊ | 36/62 [02:00<01:20, 3.11s/it]
|
| 305 |
+
|
| 306 |
+
|
| 307 |
|
| 308 |
60%|█████▉ | 37/62 [02:03<01:26, 3.47s/it]
|
| 309 |
|
| 310 |
60%|█████▉ | 37/62 [02:03<01:26, 3.47s/it]
|
| 311 |
61%|██████▏ | 38/62 [02:05<01:18, 3.26s/it]
|
| 312 |
|
| 313 |
61%|██████▏ | 38/62 [02:05<01:18, 3.26s/it]
|
| 314 |
63%|██████▎ | 39/62 [02:08<01:12, 3.15s/it]
|
| 315 |
|
| 316 |
63%|██████▎ | 39/62 [02:08<01:12, 3.15s/it]
|
| 317 |
65%|██████▍ | 40/62 [02:12<01:12, 3.32s/it]
|
| 318 |
|
| 319 |
65%|██████▍ | 40/62 [02:12<01:12, 3.32s/it]
|
| 320 |
+
|
| 321 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 322 |
+
|
| 323 |
33%|███▎ | 2/6 [00:00<00:00, 8.84it/s]
|
| 324 |
+
|
| 325 |
50%|█████ | 3/6 [00:00<00:00, 6.23it/s]
|
| 326 |
+
|
| 327 |
67%|██████▋ | 4/6 [00:00<00:00, 5.40it/s]
|
| 328 |
+
|
| 329 |
83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
|
| 330 |
+
|
| 331 |
|
| 332 |
+
|
| 333 |
|
| 334 |
65%|██████▍ | 40/62 [02:13<01:12, 3.32s/it]
|
| 335 |
+
|
| 336 |
+
|
| 337 |
|
| 338 |
66%|██████▌ | 41/62 [02:16<01:15, 3.61s/it]
|
| 339 |
|
| 340 |
66%|██████▌ | 41/62 [02:16<01:15, 3.61s/it]
|
| 341 |
68%|██████▊ | 42/62 [02:19<01:08, 3.40s/it]
|
| 342 |
|
| 343 |
68%|██████▊ | 42/62 [02:19<01:08, 3.40s/it]
|
| 344 |
69%|██████▉ | 43/62 [02:22<01:01, 3.26s/it]
|
| 345 |
|
| 346 |
69%|██████▉ | 43/62 [02:22<01:01, 3.26s/it]
|
| 347 |
71%|███████ | 44/62 [02:25<00:56, 3.16s/it]
|
| 348 |
|
| 349 |
71%|███████ | 44/62 [02:25<00:56, 3.16s/it]
|
| 350 |
+
|
| 351 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 352 |
+
|
| 353 |
33%|███▎ | 2/6 [00:00<00:00, 8.80it/s]
|
| 354 |
+
|
| 355 |
50%|█████ | 3/6 [00:00<00:00, 6.21it/s]
|
| 356 |
+
|
| 357 |
67%|██████▋ | 4/6 [00:00<00:00, 5.38it/s]
|
| 358 |
+
|
| 359 |
83%|████████▎ | 5/6 [00:00<00:00, 4.99it/s]
|
| 360 |
+
|
| 361 |
|
| 362 |
+
|
| 363 |
|
| 364 |
71%|███████ | 44/62 [02:26<00:56, 3.16s/it]
|
| 365 |
+
|
| 366 |
+
|
| 367 |
|
| 368 |
73%|███████▎ | 45/62 [02:29<00:59, 3.50s/it]
|
| 369 |
|
| 370 |
73%|███████▎ | 45/62 [02:29<00:59, 3.50s/it]
|
| 371 |
74%|███████▍ | 46/62 [02:32<00:53, 3.33s/it]
|
| 372 |
|
| 373 |
74%|███████▍ | 46/62 [02:32<00:53, 3.33s/it]
|
| 374 |
76%|███████▌ | 47/62 [02:35<00:48, 3.20s/it]
|
| 375 |
|
| 376 |
76%|███████▌ | 47/62 [02:35<00:48, 3.20s/it]
|
| 377 |
77%|███████▋ | 48/62 [02:38<00:43, 3.12s/it]
|
| 378 |
|
| 379 |
77%|███████▋ | 48/62 [02:38<00:43, 3.12s/it]
|
| 380 |
+
|
| 381 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 382 |
+
|
| 383 |
33%|███▎ | 2/6 [00:00<00:00, 8.80it/s]
|
| 384 |
+
|
| 385 |
50%|█████ | 3/6 [00:00<00:00, 6.21it/s]
|
| 386 |
+
|
| 387 |
67%|██████▋ | 4/6 [00:00<00:00, 5.38it/s]
|
| 388 |
+
|
| 389 |
83%|████████▎ | 5/6 [00:00<00:00, 4.98it/s]
|
| 390 |
+
|
| 391 |
|
| 392 |
+
|
| 393 |
|
| 394 |
77%|███████▋ | 48/62 [02:39<00:43, 3.12s/it]
|
| 395 |
+
|
| 396 |
+
|
| 397 |
|
| 398 |
79%|███████▉ | 49/62 [02:42<00:45, 3.48s/it]
|
| 399 |
|
| 400 |
79%|███████▉ | 49/62 [02:42<00:45, 3.48s/it]
|
| 401 |
81%|████████ | 50/62 [02:45<00:39, 3.26s/it]
|
| 402 |
|
| 403 |
81%|████████ | 50/62 [02:45<00:39, 3.26s/it]
|
| 404 |
82%|████████▏ | 51/62 [02:48<00:34, 3.16s/it]
|
| 405 |
|
| 406 |
82%|████████▏ | 51/62 [02:48<00:34, 3.16s/it]
|
| 407 |
84%|████████▍ | 52/62 [02:51<00:30, 3.09s/it]
|
| 408 |
|
| 409 |
84%|████████▍ | 52/62 [02:51<00:30, 3.09s/it]
|
| 410 |
+
|
| 411 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 412 |
+
|
| 413 |
33%|███▎ | 2/6 [00:00<00:00, 8.78it/s]
|
| 414 |
+
|
| 415 |
50%|█████ | 3/6 [00:00<00:00, 6.18it/s]
|
| 416 |
+
|
| 417 |
67%|██████▋ | 4/6 [00:00<00:00, 5.37it/s]
|
| 418 |
+
|
| 419 |
83%|████████▎ | 5/6 [00:00<00:00, 4.98it/s]
|
| 420 |
+
|
| 421 |
|
| 422 |
+
|
| 423 |
|
| 424 |
84%|████████▍ | 52/62 [02:52<00:30, 3.09s/it]
|
| 425 |
+
|
| 426 |
+
|
| 427 |
|
| 428 |
85%|████████▌ | 53/62 [02:56<00:33, 3.74s/it]
|
| 429 |
|
| 430 |
85%|████████▌ | 53/62 [02:56<00:33, 3.74s/it]
|
| 431 |
87%|████████▋ | 54/62 [02:59<00:27, 3.49s/it]
|
| 432 |
|
| 433 |
87%|████████▋ | 54/62 [02:59<00:27, 3.49s/it]
|
| 434 |
89%|████████▊ | 55/62 [03:02<00:23, 3.32s/it]
|
| 435 |
|
| 436 |
89%|████████▊ | 55/62 [03:02<00:23, 3.32s/it]
|
| 437 |
90%|█████████ | 56/62 [03:05<00:19, 3.20s/it]
|
| 438 |
|
| 439 |
90%|█████████ | 56/62 [03:05<00:19, 3.20s/it]
|
| 440 |
+
|
| 441 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 442 |
+
|
| 443 |
33%|███▎ | 2/6 [00:00<00:00, 8.79it/s]
|
| 444 |
+
|
| 445 |
50%|█████ | 3/6 [00:00<00:00, 6.19it/s]
|
| 446 |
+
|
| 447 |
67%|██████▋ | 4/6 [00:00<00:00, 5.37it/s]
|
| 448 |
+
|
| 449 |
83%|████████▎ | 5/6 [00:00<00:00, 4.99it/s]
|
| 450 |
+
|
| 451 |
|
| 452 |
+
|
| 453 |
|
| 454 |
90%|█████████ | 56/62 [03:06<00:19, 3.20s/it]
|
| 455 |
+
|
| 456 |
+
|
| 457 |
|
| 458 |
92%|█████████▏| 57/62 [03:09<00:17, 3.53s/it]
|
| 459 |
|
| 460 |
92%|█████████▏| 57/62 [03:09<00:17, 3.53s/it]
|
| 461 |
94%|█████████▎| 58/62 [03:12<00:13, 3.35s/it]
|
| 462 |
|
| 463 |
94%|█████████▎| 58/62 [03:12<00:13, 3.35s/it]
|
| 464 |
95%|█████████▌| 59/62 [03:15<00:09, 3.22s/it]
|
| 465 |
|
| 466 |
95%|█████████▌| 59/62 [03:15<00:09, 3.22s/it]
|
| 467 |
97%|█████████▋| 60/62 [03:18<00:06, 3.13s/it]
|
| 468 |
|
| 469 |
97%|█████████▋| 60/62 [03:18<00:06, 3.13s/it]
|
| 470 |
+
|
| 471 |
0%| | 0/6 [00:00<?, ?it/s]
|
| 472 |
+
|
| 473 |
33%|███▎ | 2/6 [00:00<00:00, 8.81it/s]
|
| 474 |
+
|
| 475 |
50%|█████ | 3/6 [00:00<00:00, 6.22it/s]
|
| 476 |
+
|
| 477 |
67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
|
| 478 |
+
|
| 479 |
83%|████████▎ | 5/6 [00:00<00:00, 4.99it/s]
|
| 480 |
+
|
| 481 |
|
| 482 |
+
|
| 483 |
|
| 484 |
97%|█████████▋| 60/62 [03:19<00:06, 3.13s/it]
|
| 485 |
+
|
| 486 |
+
|
| 487 |
|
| 488 |
98%|█████████▊| 61/62 [03:22<00:03, 3.49s/it]
|
| 489 |
|
| 490 |
98%|█████████▊| 61/62 [03:22<00:03, 3.49s/it]
|
| 491 |
|
| 492 |
|
dmog/job.output
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Your results will be stored in: /mnt/scratch/users/dhd2000/ft14/dmog/axolotl-test-outputs
|
| 2 |
+
Executing job commands, current working directory is /mnt/scratch/users/dhd2000/ft14/dmog
|
| 3 |
+
/mnt/scratch/users/dhd2000/ft14
|
| 4 |
+
[2024-04-09 08:29:06,912] [INFO] [datasets.<module>:58] [PID:30736] PyTorch version 2.1.2 available.
|
| 5 |
+
[2024-04-09 08:29:08,482] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)
|
| 6 |
+
[2024-04-09 08:29:10,330] [INFO] [axolotl.normalize_config:178] [PID:30736] [RANK:0] GPU memory usage baseline: 0.000GB (+0.640GB misc)[39m
|
| 7 |
+
dP dP dP
|
| 8 |
+
88 88 88
|
| 9 |
+
.d8888b. dP. .dP .d8888b. 88 .d8888b. d8888P 88
|
| 10 |
+
88' `88 `8bd8' 88' `88 88 88' `88 88 88
|
| 11 |
+
88. .88 .d88b. 88. .88 88 88. .88 88 88
|
| 12 |
+
`88888P8 dP' `dP `88888P' dP `88888P' dP dP
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
[2024-04-09 08:29:10,707] [DEBUG] [axolotl.load_tokenizer:245] [PID:30736] [RANK:0] EOS: 2 / </s>[39m
|
| 17 |
+
[2024-04-09 08:29:10,708] [DEBUG] [axolotl.load_tokenizer:246] [PID:30736] [RANK:0] BOS: 1 / <s>[39m
|
| 18 |
+
[2024-04-09 08:29:10,708] [DEBUG] [axolotl.load_tokenizer:247] [PID:30736] [RANK:0] PAD: 2 / </s>[39m
|
| 19 |
+
[2024-04-09 08:29:10,708] [DEBUG] [axolotl.load_tokenizer:248] [PID:30736] [RANK:0] UNK: 0 / <unk>[39m
|
| 20 |
+
[2024-04-09 08:29:10,708] [INFO] [axolotl.load_tokenizer:259] [PID:30736] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference.[39m
|
| 21 |
+
[2024-04-09 08:29:10,708] [INFO] [axolotl.load_tokenized_prepared_datasets:191] [PID:30736] [RANK:0] Unable to find prepared dataset in last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e[39m
|
| 22 |
+
[2024-04-09 08:29:10,708] [INFO] [axolotl.load_tokenized_prepared_datasets:192] [PID:30736] [RANK:0] Loading raw datasets...[39m
|
| 23 |
+
[33m[2024-04-09 08:29:10,709] [WARNING] [axolotl.load_tokenized_prepared_datasets:194] [PID:30736] [RANK:0] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset.[39m
|
| 24 |
+
[2024-04-09 08:29:10,709] [INFO] [axolotl.load_tokenized_prepared_datasets:201] [PID:30736] [RANK:0] No seed provided, using default seed of 42[39m
|
| 25 |
+
[2024-04-09 08:29:17,092] [INFO] [axolotl.load_tokenized_prepared_datasets:414] [PID:30736] [RANK:0] merging datasets[39m
|
| 26 |
+
[2024-04-09 08:29:17,096] [INFO] [axolotl.log:61] [PID:30736] [RANK:0] dropping attention_mask column[39m
|
| 27 |
+
[2024-04-09 08:29:18,698] [INFO] [axolotl.load_tokenized_prepared_datasets:424] [PID:30736] [RANK:0] Saving merged prepared dataset to disk... last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e[39m
|
| 28 |
+
[2024-04-09 08:29:18,755] [DEBUG] [axolotl.log:61] [PID:30736] [RANK:0] total_num_tokens: 21468[39m
|
| 29 |
+
[2024-04-09 08:29:18,756] [DEBUG] [axolotl.log:61] [PID:30736] [RANK:0] `total_supervised_tokens: 259`[39m
|
| 30 |
+
[2024-04-09 08:29:18,756] [DEBUG] [axolotl.log:61] [PID:30736] [RANK:0] total_num_steps: 62[39m
|
| 31 |
+
[2024-04-09 08:29:18,756] [INFO] [axolotl.prepare_dataset:124] [PID:30736] [RANK:0] Maximum number of steps set at 62[39m
|
| 32 |
+
[2024-04-09 08:29:18,759] [DEBUG] [axolotl.train.log:61] [PID:30736] [RANK:0] loading tokenizer... mistralai/Mistral-7B-v0.1[39m
|
| 33 |
+
[2024-04-09 08:29:19,028] [DEBUG] [axolotl.load_tokenizer:245] [PID:30736] [RANK:0] EOS: 2 / </s>[39m
|
| 34 |
+
[2024-04-09 08:29:19,028] [DEBUG] [axolotl.load_tokenizer:246] [PID:30736] [RANK:0] BOS: 1 / <s>[39m
|
| 35 |
+
[2024-04-09 08:29:19,028] [DEBUG] [axolotl.load_tokenizer:247] [PID:30736] [RANK:0] PAD: 2 / </s>[39m
|
| 36 |
+
[2024-04-09 08:29:19,029] [DEBUG] [axolotl.load_tokenizer:248] [PID:30736] [RANK:0] UNK: 0 / <unk>[39m
|
| 37 |
+
[2024-04-09 08:29:19,029] [INFO] [axolotl.load_tokenizer:259] [PID:30736] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference.[39m
|
| 38 |
+
[2024-04-09 08:29:19,029] [DEBUG] [axolotl.train.log:61] [PID:30736] [RANK:0] loading model and peft_config...[39m
|
| 39 |
+
[2024-04-09 08:29:35,702] [INFO] [axolotl.load_model:660] [PID:30736] [RANK:0] GPU memory usage after model load: 4.342GB (+0.138GB cache, +0.942GB misc)[39m
|
| 40 |
+
[2024-04-09 08:29:35,711] [INFO] [axolotl.load_model:701] [PID:30736] [RANK:0] converting PEFT model w/ prepare_model_for_kbit_training[39m
|
| 41 |
+
[2024-04-09 08:29:35,713] [INFO] [axolotl.load_model:710] [PID:30736] [RANK:0] converting modules to torch.bfloat16 for flash attention[39m
|
| 42 |
+
[2024-04-09 08:29:35,715] [INFO] [axolotl.load_lora:825] [PID:30736] [RANK:0] found linear modules: ['up_proj', 'q_proj', 'k_proj', 'gate_proj', 'down_proj', 'o_proj', 'v_proj'][39m
|
| 43 |
+
trainable params: 83,886,080 || all params: 7,325,618,176 || trainable%: 1.1451058188485088
|
| 44 |
+
[2024-04-09 08:29:36,348] [INFO] [axolotl.load_model:750] [PID:30736] [RANK:0] GPU memory usage after adapters: 4.670GB (+0.935GB cache, +0.942GB misc)[39m
|
| 45 |
+
[2024-04-09 08:29:36,446] [INFO] [axolotl.train.log:61] [PID:30736] [RANK:0] Pre-saving adapter config to ./qlora-out[39m
|
| 46 |
+
[2024-04-09 08:29:36,459] [INFO] [axolotl.train.log:61] [PID:30736] [RANK:0] Starting trainer...[39m
|
| 47 |
+
{'loss': 6.6367, 'grad_norm': 102.28898620605469, 'learning_rate': 2e-05, 'epoch': 0.08}
|
| 48 |
+
{'eval_loss': 7.300913333892822, 'eval_runtime': 1.3523, 'eval_samples_per_second': 8.873, 'eval_steps_per_second': 4.437, 'epoch': 0.08}
|
| 49 |
+
[2024-04-09 08:29:44,573] [INFO] [axolotl.callbacks.on_step_end:123] [PID:30736] [RANK:0] GPU memory usage while training: 4.843GB (+1.177GB cache, +0.965GB misc)[39m
|
| 50 |
+
{'loss': 7.0616, 'grad_norm': 103.4541015625, 'learning_rate': 4e-05, 'epoch': 0.16}
|
| 51 |
+
{'loss': 4.686, 'grad_norm': 67.47515869140625, 'learning_rate': 6e-05, 'epoch': 0.24}
|
| 52 |
+
{'loss': 2.3866, 'grad_norm': 72.36919403076172, 'learning_rate': 8e-05, 'epoch': 0.32}
|
| 53 |
+
{'eval_loss': 0.7137572169303894, 'eval_runtime': 1.3532, 'eval_samples_per_second': 8.868, 'eval_steps_per_second': 4.434, 'epoch': 0.32}
|
| 54 |
+
{'loss': 0.6844, 'grad_norm': 16.83085060119629, 'learning_rate': 0.0001, 'epoch': 0.4}
|
| 55 |
+
{'loss': 0.914, 'grad_norm': 25.897714614868164, 'learning_rate': 0.00012, 'epoch': 0.48}
|
| 56 |
+
{'loss': 0.63, 'grad_norm': 18.89151382446289, 'learning_rate': 0.00014, 'epoch': 0.56}
|
| 57 |
+
{'loss': 0.948, 'grad_norm': 27.15555763244629, 'learning_rate': 0.00016, 'epoch': 0.64}
|
| 58 |
+
{'eval_loss': 1.0445994138717651, 'eval_runtime': 1.356, 'eval_samples_per_second': 8.85, 'eval_steps_per_second': 4.425, 'epoch': 0.64}
|
| 59 |
+
{'loss': 1.0285, 'grad_norm': 20.812381744384766, 'learning_rate': 0.00018, 'epoch': 0.72}
|
| 60 |
+
{'loss': 1.3756, 'grad_norm': 56.3886604309082, 'learning_rate': 0.0002, 'epoch': 0.8}
|
| 61 |
+
{'loss': 0.5178, 'grad_norm': 6.24803352355957, 'learning_rate': 0.00019981755542233177, 'epoch': 0.88}
|
| 62 |
+
{'loss': 0.6822, 'grad_norm': 8.379430770874023, 'learning_rate': 0.0001992708874098054, 'epoch': 0.96}
|
| 63 |
+
{'eval_loss': 1.3959709405899048, 'eval_runtime': 1.3583, 'eval_samples_per_second': 8.835, 'eval_steps_per_second': 4.417, 'epoch': 0.96}
|
| 64 |
+
{'loss': 1.3762, 'grad_norm': 20.744348526000977, 'learning_rate': 0.00019836199069471437, 'epoch': 1.04}
|
| 65 |
+
{'loss': 0.5248, 'grad_norm': 4.800480842590332, 'learning_rate': 0.0001970941817426052, 'epoch': 1.12}
|
| 66 |
+
{'loss': 0.8094, 'grad_norm': 11.284302711486816, 'learning_rate': 0.00019547208665085457, 'epoch': 1.2}
|
| 67 |
+
{'loss': 0.5222, 'grad_norm': 5.787976264953613, 'learning_rate': 0.0001935016242685415, 'epoch': 1.28}
|
| 68 |
+
{'eval_loss': 0.9023411870002747, 'eval_runtime': 1.3623, 'eval_samples_per_second': 8.808, 'eval_steps_per_second': 4.404, 'epoch': 1.28}
|
| 69 |
+
{'loss': 0.8027, 'grad_norm': 21.48629379272461, 'learning_rate': 0.00019118998459920902, 'epoch': 1.36}
|
| 70 |
+
{'loss': 1.7772, 'grad_norm': 38.0982666015625, 'learning_rate': 0.000188545602565321, 'epoch': 1.44}
|
| 71 |
+
{'loss': 0.7737, 'grad_norm': 10.824837684631348, 'learning_rate': 0.00018557812723014476, 'epoch': 1.52}
|
| 72 |
+
{'loss': 0.534, 'grad_norm': 9.1353120803833, 'learning_rate': 0.00018229838658936564, 'epoch': 1.6}
|
| 73 |
+
{'eval_loss': 0.4847445785999298, 'eval_runtime': 1.3637, 'eval_samples_per_second': 8.799, 'eval_steps_per_second': 4.4, 'epoch': 1.6}
|
| 74 |
+
{'loss': 0.3201, 'grad_norm': 3.8411033153533936, 'learning_rate': 0.00017871834806090501, 'epoch': 1.68}
|
| 75 |
+
{'loss': 2.2541, 'grad_norm': 23.888507843017578, 'learning_rate': 0.00017485107481711012, 'epoch': 1.76}
|
| 76 |
+
{'loss': 0.8177, 'grad_norm': 8.5956392288208, 'learning_rate': 0.00017071067811865476, 'epoch': 1.84}
|
| 77 |
+
{'loss': 0.4624, 'grad_norm': 3.825141191482544, 'learning_rate': 0.00016631226582407952, 'epoch': 1.92}
|
| 78 |
+
{'eval_loss': 0.5740255117416382, 'eval_runtime': 1.3655, 'eval_samples_per_second': 8.788, 'eval_steps_per_second': 4.394, 'epoch': 1.92}
|
| 79 |
+
{'loss': 0.3714, 'grad_norm': 3.558993101119995, 'learning_rate': 0.00016167188726285434, 'epoch': 2.0}
|
| 80 |
+
{'loss': 0.6562, 'grad_norm': 11.759211540222168, 'learning_rate': 0.00015680647467311557, 'epoch': 2.08}
|
| 81 |
+
{'loss': 1.5141, 'grad_norm': 96.2179183959961, 'learning_rate': 0.00015173378141776568, 'epoch': 2.16}
|
| 82 |
+
{'loss': 0.7753, 'grad_norm': 31.022045135498047, 'learning_rate': 0.00014647231720437686, 'epoch': 2.24}
|
| 83 |
+
{'eval_loss': 0.3771994113922119, 'eval_runtime': 1.3676, 'eval_samples_per_second': 8.775, 'eval_steps_per_second': 4.387, 'epoch': 2.24}
|
| 84 |
+
{'loss': 0.2649, 'grad_norm': 3.5004501342773438, 'learning_rate': 0.0001410412805452757, 'epoch': 2.32}
|
| 85 |
+
{'loss': 0.171, 'grad_norm': 5.16464376449585, 'learning_rate': 0.00013546048870425356, 'epoch': 2.4}
|
| 86 |
+
{'loss': 0.9172, 'grad_norm': 25.634010314941406, 'learning_rate': 0.00012975030538552032, 'epoch': 2.48}
|
| 87 |
+
{'loss': 0.3324, 'grad_norm': 7.102908134460449, 'learning_rate': 0.0001239315664287558, 'epoch': 2.56}
|
| 88 |
+
{'eval_loss': 0.29374203085899353, 'eval_runtime': 1.3678, 'eval_samples_per_second': 8.773, 'eval_steps_per_second': 4.387, 'epoch': 2.56}
|
| 89 |
+
{'loss': 0.4932, 'grad_norm': 6.236325263977051, 'learning_rate': 0.0001180255037813906, 'epoch': 2.64}
|
| 90 |
+
{'loss': 0.1284, 'grad_norm': 4.445058345794678, 'learning_rate': 0.0001120536680255323, 'epoch': 2.72}
|
| 91 |
+
{'loss': 0.1547, 'grad_norm': 6.94170618057251, 'learning_rate': 0.00010603784974222861, 'epoch': 2.8}
|
| 92 |
+
{'loss': 0.1973, 'grad_norm': 5.656033039093018, 'learning_rate': 0.0001, 'epoch': 2.88}
|
| 93 |
+
{'eval_loss': 0.5674905180931091, 'eval_runtime': 1.3681, 'eval_samples_per_second': 8.771, 'eval_steps_per_second': 4.386, 'epoch': 2.88}
|
| 94 |
+
{'loss': 0.4884, 'grad_norm': 18.19667625427246, 'learning_rate': 9.396215025777139e-05, 'epoch': 2.96}
|
| 95 |
+
{'loss': 0.5526, 'grad_norm': 17.964893341064453, 'learning_rate': 8.79463319744677e-05, 'epoch': 3.04}
|
| 96 |
+
{'loss': 0.2116, 'grad_norm': 5.015590190887451, 'learning_rate': 8.197449621860943e-05, 'epoch': 3.12}
|
| 97 |
+
{'loss': 0.0843, 'grad_norm': 5.6883225440979, 'learning_rate': 7.606843357124426e-05, 'epoch': 3.2}
|
| 98 |
+
{'eval_loss': 0.2360386848449707, 'eval_runtime': 1.3667, 'eval_samples_per_second': 8.78, 'eval_steps_per_second': 4.39, 'epoch': 3.2}
|
| 99 |
+
{'loss': 0.1158, 'grad_norm': 6.636446475982666, 'learning_rate': 7.024969461447972e-05, 'epoch': 3.28}
|
| 100 |
+
{'loss': 0.2755, 'grad_norm': 4.405576229095459, 'learning_rate': 6.453951129574644e-05, 'epoch': 3.36}
|
| 101 |
+
{'loss': 0.0186, 'grad_norm': 1.6179524660110474, 'learning_rate': 5.8958719454724346e-05, 'epoch': 3.44}
|
| 102 |
+
{'loss': 0.3836, 'grad_norm': 8.783114433288574, 'learning_rate': 5.3527682795623146e-05, 'epoch': 3.52}
|
| 103 |
+
{'eval_loss': 0.13969357311725616, 'eval_runtime': 1.3687, 'eval_samples_per_second': 8.767, 'eval_steps_per_second': 4.384, 'epoch': 3.52}
|
| 104 |
+
{'loss': 0.0141, 'grad_norm': 0.8835445046424866, 'learning_rate': 4.826621858223431e-05, 'epoch': 3.6}
|
| 105 |
+
{'loss': 0.6196, 'grad_norm': 12.678099632263184, 'learning_rate': 4.3193525326884435e-05, 'epoch': 3.68}
|
| 106 |
+
{'loss': 0.0948, 'grad_norm': 5.320870876312256, 'learning_rate': 3.832811273714569e-05, 'epoch': 3.76}
|
| 107 |
+
{'loss': 0.0449, 'grad_norm': 2.7501108646392822, 'learning_rate': 3.36877341759205e-05, 'epoch': 3.84}
|
| 108 |
+
{'eval_loss': 0.2801015079021454, 'eval_runtime': 1.3706, 'eval_samples_per_second': 8.755, 'eval_steps_per_second': 4.378, 'epoch': 3.84}
|
| 109 |
+
{'loss': 0.3026, 'grad_norm': 4.41072940826416, 'learning_rate': 2.9289321881345254e-05, 'epoch': 3.92}
|
| 110 |
+
{'loss': 0.0152, 'grad_norm': 1.2105910778045654, 'learning_rate': 2.514892518288988e-05, 'epoch': 4.0}
|
| 111 |
+
{'loss': 0.0629, 'grad_norm': 4.502895355224609, 'learning_rate': 2.1281651939094992e-05, 'epoch': 4.08}
|
| 112 |
+
{'loss': 0.2246, 'grad_norm': 6.058006286621094, 'learning_rate': 1.7701613410634365e-05, 'epoch': 4.16}
|
| 113 |
+
{'eval_loss': 0.19463467597961426, 'eval_runtime': 1.3725, 'eval_samples_per_second': 8.743, 'eval_steps_per_second': 4.372, 'epoch': 4.16}
|
| 114 |
+
{'loss': 0.0093, 'grad_norm': 0.5118169784545898, 'learning_rate': 1.442187276985526e-05, 'epoch': 4.24}
|
| 115 |
+
{'loss': 0.0148, 'grad_norm': 0.8497004508972168, 'learning_rate': 1.1454397434679021e-05, 'epoch': 4.32}
|
| 116 |
+
{'loss': 0.0392, 'grad_norm': 1.752151608467102, 'learning_rate': 8.810015400790994e-06, 'epoch': 4.4}
|
| 117 |
+
{'loss': 0.229, 'grad_norm': 3.6673429012298584, 'learning_rate': 6.498375731458528e-06, 'epoch': 4.48}
|
| 118 |
+
{'eval_loss': 0.16181980073451996, 'eval_runtime': 1.3705, 'eval_samples_per_second': 8.756, 'eval_steps_per_second': 4.378, 'epoch': 4.48}
|
| 119 |
+
{'loss': 0.1722, 'grad_norm': 2.9522616863250732, 'learning_rate': 4.527913349145441e-06, 'epoch': 4.56}
|
| 120 |
+
{'loss': 0.0295, 'grad_norm': 1.5037487745285034, 'learning_rate': 2.905818257394799e-06, 'epoch': 4.64}
|
| 121 |
+
{'loss': 0.03, 'grad_norm': 1.4181660413742065, 'learning_rate': 1.6380093052856483e-06, 'epoch': 4.72}
|
| 122 |
+
{'loss': 0.3073, 'grad_norm': 9.207091331481934, 'learning_rate': 7.291125901946027e-07, 'epoch': 4.8}
|
| 123 |
+
{'eval_loss': 0.14654164016246796, 'eval_runtime': 1.3704, 'eval_samples_per_second': 8.756, 'eval_steps_per_second': 4.378, 'epoch': 4.8}
|
| 124 |
+
{'loss': 0.032, 'grad_norm': 1.5023337602615356, 'learning_rate': 1.824445776682504e-07, 'epoch': 4.88}
|
| 125 |
+
{'loss': 0.1144, 'grad_norm': 2.882874011993408, 'learning_rate': 0.0, 'epoch': 4.96}
|
| 126 |
+
{'train_runtime': 206.4235, 'train_samples_per_second': 2.403, 'train_steps_per_second': 0.3, 'train_loss': 0.7901421915739775, 'epoch': 4.96}
|
| 127 |
+
[2024-04-09 08:33:03,093] [INFO] [axolotl.train.log:61] [PID:30736] [RANK:0] Training Completed!!! Saving pre-trained model to ./qlora-out[39m
|
| 128 |
+
(PeftModelForCausalLM( (base_model): LoraModel( (model): MistralForCausalLM( (model): MistralModel( (embed_tokens): Embedding(32000, 4096) (layers): ModuleList( (0-31): 32 x MistralDecoderLayer( (self_attn): MistralFlashAttention2( (q_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=4096, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (k_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=1024, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (v_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=1024, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (o_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=4096, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (rotary_emb): MistralRotaryEmbedding() ) (mlp): MistralMLP( (gate_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=14336, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=14336, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (up_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=14336, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=14336, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (down_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=14336, out_features=4096, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=14336, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=4096, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (act_fn): SiLU() ) (input_layernorm): MistralRMSNorm() (post_attention_layernorm): MistralRMSNorm() ) ) (norm): MistralRMSNorm() ) (lm_head): Linear(in_features=4096, out_features=32000, bias=False) ) ) ), LlamaTokenizer(name_or_path='mistralai/Mistral-7B-v0.1', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>'}, clean_up_tokenization_spaces=False), added_tokens_decoder={ 0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), 1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), 2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), })
|
| 129 |
+
|
| 130 |
+
End of job
|
| 131 |
+
Output file has been generated, please check /mnt/scratch/users/dhd2000/ft14/dmog/axolotl-test-outputs/test.output
|
finetune-test.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This script is used to test the model using a dataset
|
| 2 |
+
|
| 3 |
+
# Import the necessary libraries
|
| 4 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 5 |
+
from langchain.memory import ConversationBufferWindowMemory
|
| 6 |
+
from peft import PeftModel
|
| 7 |
+
import torch
|
| 8 |
+
|
| 9 |
+
import json
|
| 10 |
+
import sys
|
| 11 |
+
|
| 12 |
+
# Check if the correct number of arguments are provided
|
| 13 |
+
if len(sys.argv) != 2:
|
| 14 |
+
print("Usage: python finetune.py <jsonl_file>")
|
| 15 |
+
sys.exit(1)
|
| 16 |
+
|
| 17 |
+
# Get the file path from the command-line argument
|
| 18 |
+
jsonl_file_path = sys.argv[1]
|
| 19 |
+
|
| 20 |
+
# Load the model and tokenizer
|
| 21 |
+
base_model = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 22 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model)
|
| 23 |
+
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
|
| 24 |
+
base_model = AutoModelForCausalLM.from_pretrained(base_model)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
ft_model = PeftModel.from_pretrained(base_model, "./qlora-out")
|
| 28 |
+
# ft_model = ft_model.merge_and_unload()
|
| 29 |
+
ft_model.eval()
|
| 30 |
+
|
| 31 |
+
# Set the device
|
| 32 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 33 |
+
ft_model.to(device)
|
| 34 |
+
|
| 35 |
+
# Read the JSONL file
|
| 36 |
+
with open(jsonl_file_path, "r") as f:
|
| 37 |
+
tp, tn, fp, fn = 0, 0, 0, 0
|
| 38 |
+
for line in f:
|
| 39 |
+
data = json.loads(line)
|
| 40 |
+
user_in = data["input"]
|
| 41 |
+
user_input = f"[INST] ###instruction: Check if the given traffic flow is normal or of an attacker or a victim\n###input: {user_in}\n#output: [/INST]"
|
| 42 |
+
encodings = tokenizer(user_input, return_tensors="pt", padding=True).to(device)
|
| 43 |
+
input_ids = encodings["input_ids"]
|
| 44 |
+
attention_mask = encodings["attention_mask"]
|
| 45 |
+
|
| 46 |
+
output_ids = ft_model.generate(input_ids, attention_mask = attention_mask, max_new_tokens=1000, num_return_sequences=1, do_sample=True, temperature=0.1, top_p=0.9)
|
| 47 |
+
|
| 48 |
+
generated_ids = output_ids[0, input_ids.shape[-1]:]
|
| 49 |
+
|
| 50 |
+
# Decode the output
|
| 51 |
+
response = tokenizer.decode(generated_ids, skip_special_tokens=True).lower()
|
| 52 |
+
|
| 53 |
+
# calculate true positive, true negative, false positive, false negative
|
| 54 |
+
if "normal" not in response and data["output"] == response:
|
| 55 |
+
tp += 1
|
| 56 |
+
elif "normal" in response and data["output"] == response:
|
| 57 |
+
tn += 1
|
| 58 |
+
elif "normal" in response and data["output"] != response:
|
| 59 |
+
fp += 1
|
| 60 |
+
elif "normal" not in response and data["output"] != response:
|
| 61 |
+
fn += 1
|
| 62 |
+
else:
|
| 63 |
+
print(f"Error: {response}, {data[output]}")
|
| 64 |
+
print(f"User input: {user_in}")
|
| 65 |
+
print(f"Generated response: {response}")
|
| 66 |
+
print(f"Expected response: {data[output]}")
|
| 67 |
+
print()
|
| 68 |
+
|
| 69 |
+
print(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
|
last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57a8a6b98277d114990fb441a27d2f777773005e6b7cf57a0ec219fe3bae40b1
|
| 3 |
+
size 259336
|
last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/dataset_info.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"citation": "",
|
| 3 |
+
"description": "",
|
| 4 |
+
"features": {
|
| 5 |
+
"input_ids": {
|
| 6 |
+
"feature": {
|
| 7 |
+
"dtype": "int32",
|
| 8 |
+
"_type": "Value"
|
| 9 |
+
},
|
| 10 |
+
"_type": "Sequence"
|
| 11 |
+
},
|
| 12 |
+
"labels": {
|
| 13 |
+
"feature": {
|
| 14 |
+
"dtype": "int64",
|
| 15 |
+
"_type": "Value"
|
| 16 |
+
},
|
| 17 |
+
"_type": "Sequence"
|
| 18 |
+
}
|
| 19 |
+
},
|
| 20 |
+
"homepage": "",
|
| 21 |
+
"license": ""
|
| 22 |
+
}
|
last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/state.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_data_files": [
|
| 3 |
+
{
|
| 4 |
+
"filename": "data-00000-of-00001.arrow"
|
| 5 |
+
}
|
| 6 |
+
],
|
| 7 |
+
"_fingerprint": "992b9317aa372e8e",
|
| 8 |
+
"_format_columns": [
|
| 9 |
+
"input_ids",
|
| 10 |
+
"labels"
|
| 11 |
+
],
|
| 12 |
+
"_format_kwargs": {},
|
| 13 |
+
"_format_type": null,
|
| 14 |
+
"_output_all_columns": false,
|
| 15 |
+
"_split": null
|
| 16 |
+
}
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/meta.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
artifact_uri: file:///mnt/scratch/users/dhd2000/ft14/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/artifacts
|
| 2 |
+
end_time: 1712647983089
|
| 3 |
+
entry_point_name: ''
|
| 4 |
+
experiment_id: '0'
|
| 5 |
+
lifecycle_stage: active
|
| 6 |
+
run_id: 7e75ece8e18e485db64e4e2d9196e738
|
| 7 |
+
run_name: ./qlora-out
|
| 8 |
+
run_uuid: 7e75ece8e18e485db64e4e2d9196e738
|
| 9 |
+
source_name: ''
|
| 10 |
+
source_type: 4
|
| 11 |
+
source_version: ''
|
| 12 |
+
start_time: 1712647776681
|
| 13 |
+
status: 3
|
| 14 |
+
tags: []
|
| 15 |
+
user_id: dhd2000
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/epoch
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
1712647780322 0.08 1
|
| 2 |
+
1712647781687 0.08 1
|
| 3 |
+
1712647784574 0.16 2
|
| 4 |
+
1712647787465 0.24 3
|
| 5 |
+
1712647790359 0.32 4
|
| 6 |
+
1712647791728 0.32 4
|
| 7 |
+
1712647794625 0.4 5
|
| 8 |
+
1712647797520 0.48 6
|
| 9 |
+
1712647800416 0.56 7
|
| 10 |
+
1712647803312 0.64 8
|
| 11 |
+
1712647804683 0.64 8
|
| 12 |
+
1712647807578 0.72 9
|
| 13 |
+
1712647810474 0.8 10
|
| 14 |
+
1712647813372 0.88 11
|
| 15 |
+
1712647816270 0.96 12
|
| 16 |
+
1712647817642 0.96 12
|
| 17 |
+
1712647820386 1.04 13
|
| 18 |
+
1712647824221 1.12 14
|
| 19 |
+
1712647827138 1.2 15
|
| 20 |
+
1712647830042 1.28 16
|
| 21 |
+
1712647831420 1.28 16
|
| 22 |
+
1712647834333 1.36 17
|
| 23 |
+
1712647837242 1.44 18
|
| 24 |
+
1712647840147 1.52 19
|
| 25 |
+
1712647843070 1.6 20
|
| 26 |
+
1712647844448 1.6 20
|
| 27 |
+
1712647847363 1.68 21
|
| 28 |
+
1712647850290 1.76 22
|
| 29 |
+
1712647853203 1.84 23
|
| 30 |
+
1712647856116 1.92 24
|
| 31 |
+
1712647857496 1.92 24
|
| 32 |
+
1712647860252 2.0 25
|
| 33 |
+
1712647863163 2.08 26
|
| 34 |
+
1712647866855 2.16 27
|
| 35 |
+
1712647869769 2.24 28
|
| 36 |
+
1712647871150 2.24 28
|
| 37 |
+
1712647874087 2.32 29
|
| 38 |
+
1712647877006 2.4 30
|
| 39 |
+
1712647879921 2.48 31
|
| 40 |
+
1712647882836 2.56 32
|
| 41 |
+
1712647884219 2.56 32
|
| 42 |
+
1712647887139 2.64 33
|
| 43 |
+
1712647890053 2.72 34
|
| 44 |
+
1712647892967 2.8 35
|
| 45 |
+
1712647895882 2.88 36
|
| 46 |
+
1712647897265 2.88 36
|
| 47 |
+
1712647900187 2.96 37
|
| 48 |
+
1712647902945 3.04 38
|
| 49 |
+
1712647905861 3.12 39
|
| 50 |
+
1712647909552 3.2 40
|
| 51 |
+
1712647910933 3.2 40
|
| 52 |
+
1712647913855 3.28 41
|
| 53 |
+
1712647916777 3.36 42
|
| 54 |
+
1712647919694 3.44 43
|
| 55 |
+
1712647922609 3.52 44
|
| 56 |
+
1712647923992 3.52 44
|
| 57 |
+
1712647926911 3.6 45
|
| 58 |
+
1712647929833 3.68 46
|
| 59 |
+
1712647932754 3.76 47
|
| 60 |
+
1712647935680 3.84 48
|
| 61 |
+
1712647937065 3.84 48
|
| 62 |
+
1712647939993 3.92 49
|
| 63 |
+
1712647942758 4.0 50
|
| 64 |
+
1712647945681 4.08 51
|
| 65 |
+
1712647948606 4.16 52
|
| 66 |
+
1712647949993 4.16 52
|
| 67 |
+
1712647953856 4.24 53
|
| 68 |
+
1712647956779 4.32 54
|
| 69 |
+
1712647959701 4.4 55
|
| 70 |
+
1712647962622 4.48 56
|
| 71 |
+
1712647964007 4.48 56
|
| 72 |
+
1712647966930 4.56 57
|
| 73 |
+
1712647969855 4.64 58
|
| 74 |
+
1712647972779 4.72 59
|
| 75 |
+
1712647975702 4.8 60
|
| 76 |
+
1712647977087 4.8 60
|
| 77 |
+
1712647980013 4.88 61
|
| 78 |
+
1712647982936 4.96 62
|
| 79 |
+
1712647983084 4.96 62
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_loss
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
1712647781687 7.300913333892822 1
|
| 2 |
+
1712647791728 0.7137572169303894 4
|
| 3 |
+
1712647804683 1.0445994138717651 8
|
| 4 |
+
1712647817642 1.3959709405899048 12
|
| 5 |
+
1712647831420 0.9023411870002747 16
|
| 6 |
+
1712647844448 0.4847445785999298 20
|
| 7 |
+
1712647857496 0.5740255117416382 24
|
| 8 |
+
1712647871150 0.3771994113922119 28
|
| 9 |
+
1712647884219 0.29374203085899353 32
|
| 10 |
+
1712647897265 0.5674905180931091 36
|
| 11 |
+
1712647910933 0.2360386848449707 40
|
| 12 |
+
1712647923992 0.13969357311725616 44
|
| 13 |
+
1712647937065 0.2801015079021454 48
|
| 14 |
+
1712647949993 0.19463467597961426 52
|
| 15 |
+
1712647964007 0.16181980073451996 56
|
| 16 |
+
1712647977087 0.14654164016246796 60
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_runtime
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
1712647781687 1.3523 1
|
| 2 |
+
1712647791728 1.3532 4
|
| 3 |
+
1712647804683 1.356 8
|
| 4 |
+
1712647817642 1.3583 12
|
| 5 |
+
1712647831420 1.3623 16
|
| 6 |
+
1712647844448 1.3637 20
|
| 7 |
+
1712647857496 1.3655 24
|
| 8 |
+
1712647871150 1.3676 28
|
| 9 |
+
1712647884219 1.3678 32
|
| 10 |
+
1712647897265 1.3681 36
|
| 11 |
+
1712647910933 1.3667 40
|
| 12 |
+
1712647923992 1.3687 44
|
| 13 |
+
1712647937065 1.3706 48
|
| 14 |
+
1712647949993 1.3725 52
|
| 15 |
+
1712647964007 1.3705 56
|
| 16 |
+
1712647977087 1.3704 60
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_samples_per_second
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
1712647781687 8.873 1
|
| 2 |
+
1712647791728 8.868 4
|
| 3 |
+
1712647804683 8.85 8
|
| 4 |
+
1712647817642 8.835 12
|
| 5 |
+
1712647831420 8.808 16
|
| 6 |
+
1712647844448 8.799 20
|
| 7 |
+
1712647857496 8.788 24
|
| 8 |
+
1712647871150 8.775 28
|
| 9 |
+
1712647884219 8.773 32
|
| 10 |
+
1712647897265 8.771 36
|
| 11 |
+
1712647910933 8.78 40
|
| 12 |
+
1712647923992 8.767 44
|
| 13 |
+
1712647937065 8.755 48
|
| 14 |
+
1712647949993 8.743 52
|
| 15 |
+
1712647964007 8.756 56
|
| 16 |
+
1712647977087 8.756 60
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_steps_per_second
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
1712647781687 4.437 1
|
| 2 |
+
1712647791728 4.434 4
|
| 3 |
+
1712647804683 4.425 8
|
| 4 |
+
1712647817642 4.417 12
|
| 5 |
+
1712647831420 4.404 16
|
| 6 |
+
1712647844448 4.4 20
|
| 7 |
+
1712647857496 4.394 24
|
| 8 |
+
1712647871150 4.387 28
|
| 9 |
+
1712647884219 4.387 32
|
| 10 |
+
1712647897265 4.386 36
|
| 11 |
+
1712647910933 4.39 40
|
| 12 |
+
1712647923992 4.384 44
|
| 13 |
+
1712647937065 4.378 48
|
| 14 |
+
1712647949993 4.372 52
|
| 15 |
+
1712647964007 4.378 56
|
| 16 |
+
1712647977087 4.378 60
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/grad_norm
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
1712647780322 102.28898620605469 1
|
| 2 |
+
1712647784574 103.4541015625 2
|
| 3 |
+
1712647787465 67.47515869140625 3
|
| 4 |
+
1712647790359 72.36919403076172 4
|
| 5 |
+
1712647794625 16.83085060119629 5
|
| 6 |
+
1712647797520 25.897714614868164 6
|
| 7 |
+
1712647800416 18.89151382446289 7
|
| 8 |
+
1712647803312 27.15555763244629 8
|
| 9 |
+
1712647807578 20.812381744384766 9
|
| 10 |
+
1712647810474 56.3886604309082 10
|
| 11 |
+
1712647813372 6.24803352355957 11
|
| 12 |
+
1712647816270 8.379430770874023 12
|
| 13 |
+
1712647820386 20.744348526000977 13
|
| 14 |
+
1712647824221 4.800480842590332 14
|
| 15 |
+
1712647827138 11.284302711486816 15
|
| 16 |
+
1712647830042 5.787976264953613 16
|
| 17 |
+
1712647834333 21.48629379272461 17
|
| 18 |
+
1712647837242 38.0982666015625 18
|
| 19 |
+
1712647840147 10.824837684631348 19
|
| 20 |
+
1712647843070 9.1353120803833 20
|
| 21 |
+
1712647847363 3.8411033153533936 21
|
| 22 |
+
1712647850290 23.888507843017578 22
|
| 23 |
+
1712647853203 8.5956392288208 23
|
| 24 |
+
1712647856116 3.825141191482544 24
|
| 25 |
+
1712647860252 3.558993101119995 25
|
| 26 |
+
1712647863163 11.759211540222168 26
|
| 27 |
+
1712647866855 96.2179183959961 27
|
| 28 |
+
1712647869769 31.022045135498047 28
|
| 29 |
+
1712647874087 3.5004501342773438 29
|
| 30 |
+
1712647877006 5.16464376449585 30
|
| 31 |
+
1712647879921 25.634010314941406 31
|
| 32 |
+
1712647882836 7.102908134460449 32
|
| 33 |
+
1712647887139 6.236325263977051 33
|
| 34 |
+
1712647890053 4.445058345794678 34
|
| 35 |
+
1712647892967 6.94170618057251 35
|
| 36 |
+
1712647895882 5.656033039093018 36
|
| 37 |
+
1712647900187 18.19667625427246 37
|
| 38 |
+
1712647902945 17.964893341064453 38
|
| 39 |
+
1712647905861 5.015590190887451 39
|
| 40 |
+
1712647909552 5.6883225440979 40
|
| 41 |
+
1712647913855 6.636446475982666 41
|
| 42 |
+
1712647916777 4.405576229095459 42
|
| 43 |
+
1712647919694 1.6179524660110474 43
|
| 44 |
+
1712647922609 8.783114433288574 44
|
| 45 |
+
1712647926911 0.8835445046424866 45
|
| 46 |
+
1712647929833 12.678099632263184 46
|
| 47 |
+
1712647932754 5.320870876312256 47
|
| 48 |
+
1712647935680 2.7501108646392822 48
|
| 49 |
+
1712647939993 4.41072940826416 49
|
| 50 |
+
1712647942758 1.2105910778045654 50
|
| 51 |
+
1712647945681 4.502895355224609 51
|
| 52 |
+
1712647948606 6.058006286621094 52
|
| 53 |
+
1712647953856 0.5118169784545898 53
|
| 54 |
+
1712647956779 0.8497004508972168 54
|
| 55 |
+
1712647959701 1.752151608467102 55
|
| 56 |
+
1712647962622 3.6673429012298584 56
|
| 57 |
+
1712647966930 2.9522616863250732 57
|
| 58 |
+
1712647969855 1.5037487745285034 58
|
| 59 |
+
1712647972779 1.4181660413742065 59
|
| 60 |
+
1712647975702 9.207091331481934 60
|
| 61 |
+
1712647980013 1.5023337602615356 61
|
| 62 |
+
1712647982936 2.882874011993408 62
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/learning_rate
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
1712647780322 2e-05 1
|
| 2 |
+
1712647784574 4e-05 2
|
| 3 |
+
1712647787465 6e-05 3
|
| 4 |
+
1712647790359 8e-05 4
|
| 5 |
+
1712647794625 0.0001 5
|
| 6 |
+
1712647797520 0.00012 6
|
| 7 |
+
1712647800416 0.00014 7
|
| 8 |
+
1712647803312 0.00016 8
|
| 9 |
+
1712647807578 0.00018 9
|
| 10 |
+
1712647810474 0.0002 10
|
| 11 |
+
1712647813372 0.00019981755542233177 11
|
| 12 |
+
1712647816270 0.0001992708874098054 12
|
| 13 |
+
1712647820386 0.00019836199069471437 13
|
| 14 |
+
1712647824221 0.0001970941817426052 14
|
| 15 |
+
1712647827138 0.00019547208665085457 15
|
| 16 |
+
1712647830042 0.0001935016242685415 16
|
| 17 |
+
1712647834333 0.00019118998459920902 17
|
| 18 |
+
1712647837242 0.000188545602565321 18
|
| 19 |
+
1712647840147 0.00018557812723014476 19
|
| 20 |
+
1712647843070 0.00018229838658936564 20
|
| 21 |
+
1712647847363 0.00017871834806090501 21
|
| 22 |
+
1712647850290 0.00017485107481711012 22
|
| 23 |
+
1712647853203 0.00017071067811865476 23
|
| 24 |
+
1712647856116 0.00016631226582407952 24
|
| 25 |
+
1712647860252 0.00016167188726285434 25
|
| 26 |
+
1712647863163 0.00015680647467311557 26
|
| 27 |
+
1712647866855 0.00015173378141776568 27
|
| 28 |
+
1712647869769 0.00014647231720437686 28
|
| 29 |
+
1712647874087 0.0001410412805452757 29
|
| 30 |
+
1712647877006 0.00013546048870425356 30
|
| 31 |
+
1712647879921 0.00012975030538552032 31
|
| 32 |
+
1712647882836 0.0001239315664287558 32
|
| 33 |
+
1712647887139 0.0001180255037813906 33
|
| 34 |
+
1712647890053 0.0001120536680255323 34
|
| 35 |
+
1712647892967 0.00010603784974222861 35
|
| 36 |
+
1712647895882 0.0001 36
|
| 37 |
+
1712647900187 9.396215025777139e-05 37
|
| 38 |
+
1712647902945 8.79463319744677e-05 38
|
| 39 |
+
1712647905861 8.197449621860943e-05 39
|
| 40 |
+
1712647909552 7.606843357124426e-05 40
|
| 41 |
+
1712647913855 7.024969461447972e-05 41
|
| 42 |
+
1712647916777 6.453951129574644e-05 42
|
| 43 |
+
1712647919694 5.8958719454724346e-05 43
|
| 44 |
+
1712647922609 5.3527682795623146e-05 44
|
| 45 |
+
1712647926911 4.826621858223431e-05 45
|
| 46 |
+
1712647929833 4.3193525326884435e-05 46
|
| 47 |
+
1712647932754 3.832811273714569e-05 47
|
| 48 |
+
1712647935680 3.36877341759205e-05 48
|
| 49 |
+
1712647939993 2.9289321881345254e-05 49
|
| 50 |
+
1712647942758 2.514892518288988e-05 50
|
| 51 |
+
1712647945681 2.1281651939094992e-05 51
|
| 52 |
+
1712647948606 1.7701613410634365e-05 52
|
| 53 |
+
1712647953856 1.442187276985526e-05 53
|
| 54 |
+
1712647956779 1.1454397434679021e-05 54
|
| 55 |
+
1712647959701 8.810015400790994e-06 55
|
| 56 |
+
1712647962622 6.498375731458528e-06 56
|
| 57 |
+
1712647966930 4.527913349145441e-06 57
|
| 58 |
+
1712647969855 2.905818257394799e-06 58
|
| 59 |
+
1712647972779 1.6380093052856483e-06 59
|
| 60 |
+
1712647975702 7.291125901946027e-07 60
|
| 61 |
+
1712647980013 1.824445776682504e-07 61
|
| 62 |
+
1712647982936 0.0 62
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/loss
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
1712647780322 6.6367 1
|
| 2 |
+
1712647784574 7.0616 2
|
| 3 |
+
1712647787465 4.686 3
|
| 4 |
+
1712647790359 2.3866 4
|
| 5 |
+
1712647794625 0.6844 5
|
| 6 |
+
1712647797520 0.914 6
|
| 7 |
+
1712647800416 0.63 7
|
| 8 |
+
1712647803312 0.948 8
|
| 9 |
+
1712647807578 1.0285 9
|
| 10 |
+
1712647810474 1.3756 10
|
| 11 |
+
1712647813372 0.5178 11
|
| 12 |
+
1712647816270 0.6822 12
|
| 13 |
+
1712647820386 1.3762 13
|
| 14 |
+
1712647824221 0.5248 14
|
| 15 |
+
1712647827138 0.8094 15
|
| 16 |
+
1712647830042 0.5222 16
|
| 17 |
+
1712647834333 0.8027 17
|
| 18 |
+
1712647837242 1.7772 18
|
| 19 |
+
1712647840147 0.7737 19
|
| 20 |
+
1712647843070 0.534 20
|
| 21 |
+
1712647847363 0.3201 21
|
| 22 |
+
1712647850290 2.2541 22
|
| 23 |
+
1712647853203 0.8177 23
|
| 24 |
+
1712647856116 0.4624 24
|
| 25 |
+
1712647860252 0.3714 25
|
| 26 |
+
1712647863163 0.6562 26
|
| 27 |
+
1712647866855 1.5141 27
|
| 28 |
+
1712647869769 0.7753 28
|
| 29 |
+
1712647874087 0.2649 29
|
| 30 |
+
1712647877006 0.171 30
|
| 31 |
+
1712647879921 0.9172 31
|
| 32 |
+
1712647882836 0.3324 32
|
| 33 |
+
1712647887139 0.4932 33
|
| 34 |
+
1712647890053 0.1284 34
|
| 35 |
+
1712647892967 0.1547 35
|
| 36 |
+
1712647895882 0.1973 36
|
| 37 |
+
1712647900187 0.4884 37
|
| 38 |
+
1712647902945 0.5526 38
|
| 39 |
+
1712647905861 0.2116 39
|
| 40 |
+
1712647909552 0.0843 40
|
| 41 |
+
1712647913855 0.1158 41
|
| 42 |
+
1712647916777 0.2755 42
|
| 43 |
+
1712647919694 0.0186 43
|
| 44 |
+
1712647922609 0.3836 44
|
| 45 |
+
1712647926911 0.0141 45
|
| 46 |
+
1712647929833 0.6196 46
|
| 47 |
+
1712647932754 0.0948 47
|
| 48 |
+
1712647935680 0.0449 48
|
| 49 |
+
1712647939993 0.3026 49
|
| 50 |
+
1712647942758 0.0152 50
|
| 51 |
+
1712647945681 0.0629 51
|
| 52 |
+
1712647948606 0.2246 52
|
| 53 |
+
1712647953856 0.0093 53
|
| 54 |
+
1712647956779 0.0148 54
|
| 55 |
+
1712647959701 0.0392 55
|
| 56 |
+
1712647962622 0.229 56
|
| 57 |
+
1712647966930 0.1722 57
|
| 58 |
+
1712647969855 0.0295 58
|
| 59 |
+
1712647972779 0.03 59
|
| 60 |
+
1712647975702 0.3073 60
|
| 61 |
+
1712647980013 0.032 61
|
| 62 |
+
1712647982936 0.1144 62
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/total_flos
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1712647983084 5437004879757312.0 62
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_loss
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1712647983084 0.7901421915739775 62
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_runtime
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1712647983084 206.4235 62
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_samples_per_second
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1712647983084 2.403 62
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_steps_per_second
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1712647983084 0.3 62
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/_name_or_path
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
mistralai/Mistral-7B-v0.1
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/accelerator_config
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adafactor
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
False
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta1
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
0.9
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta2
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
0.999
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_epsilon
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1e-08
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/add_cross_attention
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
False
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/architectures
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
['MistralForCausalLM']
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/attention_dropout
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
0.0
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/auto_find_batch_size
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
False
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bad_words_ids
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
None
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/begin_suppress_tokens
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
None
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_dataset
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pharaouk/dharma-1/dharma_1_mini.json
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_source_max_len
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
2048
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_split
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
eval
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
True
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16_full_eval
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
False
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bos_token_id
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/chunk_size_feed_forward
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
0
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_constant_lr_ratio
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
None
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_min_lr_ratio
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
None
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cross_attention_hidden_size
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
None
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/data_seed
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
None
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_drop_last
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
False
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_num_workers
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
0
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_persistent_workers
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
False
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_pin_memory
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
True
|
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_prefetch_factor
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
None
|