caffeinatedcherrychic commited on
Commit
9955c09
·
verified ·
1 Parent(s): fbbacea

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. config.yaml +79 -0
  2. dmog/axolotl-test-outputs/test.output +5 -0
  3. dmog/job.error +162 -0
  4. dmog/job.output +131 -0
  5. finetune-test.py +72 -0
  6. last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/data-00000-of-00001.arrow +3 -0
  7. last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/dataset_info.json +22 -0
  8. last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/state.json +16 -0
  9. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/meta.yaml +15 -0
  10. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/epoch +79 -0
  11. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_loss +16 -0
  12. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_runtime +16 -0
  13. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_samples_per_second +16 -0
  14. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_steps_per_second +16 -0
  15. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/grad_norm +62 -0
  16. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/learning_rate +62 -0
  17. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/loss +62 -0
  18. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/total_flos +1 -0
  19. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_loss +1 -0
  20. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_runtime +1 -0
  21. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_samples_per_second +1 -0
  22. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_steps_per_second +1 -0
  23. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/_name_or_path +1 -0
  24. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/accelerator_config +1 -0
  25. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adafactor +1 -0
  26. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta1 +1 -0
  27. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta2 +1 -0
  28. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_epsilon +1 -0
  29. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/add_cross_attention +1 -0
  30. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/architectures +1 -0
  31. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/attention_dropout +1 -0
  32. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/auto_find_batch_size +1 -0
  33. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bad_words_ids +1 -0
  34. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/begin_suppress_tokens +1 -0
  35. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_dataset +1 -0
  36. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_source_max_len +1 -0
  37. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_split +1 -0
  38. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16 +1 -0
  39. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16_full_eval +1 -0
  40. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bos_token_id +1 -0
  41. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/chunk_size_feed_forward +1 -0
  42. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_constant_lr_ratio +1 -0
  43. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_min_lr_ratio +1 -0
  44. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cross_attention_hidden_size +1 -0
  45. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/data_seed +1 -0
  46. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_drop_last +1 -0
  47. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_num_workers +1 -0
  48. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_persistent_workers +1 -0
  49. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_pin_memory +1 -0
  50. mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_prefetch_factor +1 -0
config.yaml ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: mistralai/Mistral-7B-v0.1
2
+ model_type: MistralForCausalLM
3
+ tokenizer_type: LlamaTokenizer
4
+
5
+ load_in_8bit: false
6
+ load_in_4bit: true
7
+ strict: false
8
+
9
+ datasets:
10
+ - path: caffeinatedcherrychic/cidds-agg-balanced
11
+ type: alpaca
12
+ dataset_prepared_path: last_run_prepared
13
+ val_set_size: 0.1
14
+ output_dir: ./qlora-out
15
+
16
+ adapter: qlora
17
+ lora_model_dir:
18
+
19
+ sequence_len: 256
20
+ sample_packing: false
21
+ pad_to_sequence_len: true
22
+
23
+ lora_r: 32
24
+ lora_alpha: 64
25
+ lora_dropout: 0.05
26
+ lora_target_linear: true
27
+ lora_fan_in_fan_out:
28
+ lora_target_modules:
29
+ - gate_proj
30
+ - down_proj
31
+ - up_proj
32
+ - q_proj
33
+ - v_proj
34
+ - k_proj
35
+ - o_proj
36
+
37
+ wandb_project:
38
+ wandb_entity:
39
+ wandb_watch:
40
+ wandb_name:
41
+ wandb_log_model:
42
+
43
+ gradient_accumulation_steps: 4
44
+ micro_batch_size: 2
45
+ num_epochs: 5
46
+ optimizer: adamw_bnb_8bit
47
+ lr_scheduler: cosine
48
+ learning_rate: 0.0002
49
+
50
+ train_on_inputs: false
51
+ group_by_length: false
52
+ bf16: true
53
+ fp16: false
54
+ tf32: false
55
+
56
+ gradient_checkpointing: true
57
+ early_stopping_patience:
58
+ resume_from_checkpoint:
59
+ local_rank:
60
+ logging_steps: 1
61
+ xformers_attention:
62
+ flash_attention: true
63
+
64
+ loss_watchdog_threshold: 5.0
65
+ loss_watchdog_patience: 3
66
+
67
+ max_steps: 500
68
+ warmup_steps: 10
69
+ evals_per_epoch: 4
70
+ eval_table_size:
71
+ eval_max_new_tokens: 1
72
+ saves_per_epoch: 1
73
+ debug:
74
+ deepspeed:
75
+ weight_decay: 0.001
76
+ fsdp:
77
+ fsdp_config:
78
+ special_tokens:
79
+
dmog/axolotl-test-outputs/test.output ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Hello, dhruti
2
+ #######
3
+ Finetuning
4
+ /mnt/scratch/users/dhd2000/ft14
5
+ #######
dmog/job.error ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/62 [00:00<?, ?it/s]
1
  2%|▏ | 1/62 [00:03<03:15, 3.20s/it]
2
 
3
  2%|▏ | 1/62 [00:03<03:15, 3.20s/it]
 
4
  0%| | 0/6 [00:00<?, ?it/s]
 
5
  33%|███▎ | 2/6 [00:00<00:00, 8.95it/s]
 
6
  50%|█████ | 3/6 [00:00<00:00, 6.32it/s]
 
7
  67%|██████▋ | 4/6 [00:00<00:00, 5.47it/s]
 
8
  83%|████████▎ | 5/6 [00:00<00:00, 5.07it/s]
 
9
 
 
10
 
11
  2%|▏ | 1/62 [00:04<03:15, 3.20s/it]
 
 
12
 
13
  3%|▎ | 2/62 [00:07<03:49, 3.82s/it]
14
 
15
  3%|▎ | 2/62 [00:07<03:49, 3.82s/it]
16
  5%|▍ | 3/62 [00:10<03:20, 3.40s/it]
17
 
18
  5%|▍ | 3/62 [00:10<03:20, 3.40s/it]
19
  6%|▋ | 4/62 [00:13<03:05, 3.20s/it]
20
 
21
  6%|▋ | 4/62 [00:13<03:05, 3.20s/it]
 
22
  0%| | 0/6 [00:00<?, ?it/s]
 
23
  33%|███▎ | 2/6 [00:00<00:00, 8.91it/s]
 
24
  50%|█████ | 3/6 [00:00<00:00, 6.29it/s]
 
25
  67%|██████▋ | 4/6 [00:00<00:00, 5.45it/s]
 
26
  83%|████████▎ | 5/6 [00:00<00:00, 5.06it/s]
 
27
 
 
28
 
29
  6%|▋ | 4/62 [00:14<03:05, 3.20s/it]
 
 
30
 
31
  8%|▊ | 5/62 [00:17<03:24, 3.58s/it]
32
 
33
  8%|▊ | 5/62 [00:17<03:24, 3.58s/it]
34
  10%|▉ | 6/62 [00:20<03:07, 3.35s/it]
35
 
36
  10%|▉ | 6/62 [00:20<03:07, 3.35s/it]
37
  11%|█▏ | 7/62 [00:23<02:56, 3.20s/it]
38
 
39
  11%|█▏ | 7/62 [00:23<02:56, 3.20s/it]
40
  13%|█▎ | 8/62 [00:26<02:47, 3.10s/it]
41
 
42
  13%|█▎ | 8/62 [00:26<02:47, 3.10s/it]
 
43
  0%| | 0/6 [00:00<?, ?it/s]
 
44
  33%|███▎ | 2/6 [00:00<00:00, 8.92it/s]
 
45
  50%|█████ | 3/6 [00:00<00:00, 6.28it/s]
 
46
  67%|██████▋ | 4/6 [00:00<00:00, 5.44it/s]
 
47
  83%|████████▎ | 5/6 [00:00<00:00, 5.04it/s]
 
48
 
 
49
 
50
  13%|█▎ | 8/62 [00:27<02:47, 3.10s/it]
 
 
51
 
52
  15%|█▍ | 9/62 [00:30<03:03, 3.47s/it]
53
 
54
  15%|█▍ | 9/62 [00:30<03:03, 3.47s/it]
55
  16%|█▌ | 10/62 [00:33<02:51, 3.29s/it]
56
 
57
  16%|█▌ | 10/62 [00:33<02:51, 3.29s/it]
58
  18%|█▊ | 11/62 [00:36<02:41, 3.17s/it]
59
 
60
  18%|█▊ | 11/62 [00:36<02:41, 3.17s/it]
61
  19%|█▉ | 12/62 [00:39<02:34, 3.09s/it]
62
 
63
  19%|█▉ | 12/62 [00:39<02:34, 3.09s/it]
 
64
  0%| | 0/6 [00:00<?, ?it/s]
 
65
  33%|███▎ | 2/6 [00:00<00:00, 8.89it/s]
 
66
  50%|█████ | 3/6 [00:00<00:00, 6.27it/s]
 
67
  67%|██████▋ | 4/6 [00:00<00:00, 5.43it/s]
 
68
  83%|████████▎ | 5/6 [00:00<00:00, 5.03it/s]
 
69
 
 
70
 
71
  19%|█▉ | 12/62 [00:40<02:34, 3.09s/it]
 
 
72
 
73
  21%|██ | 13/62 [00:43<02:46, 3.40s/it]
74
 
75
  21%|██ | 13/62 [00:43<02:46, 3.40s/it]
76
  23%|██▎ | 14/62 [00:47<02:49, 3.53s/it]
77
 
78
  23%|██▎ | 14/62 [00:47<02:49, 3.53s/it]
79
  24%|██▍ | 15/62 [00:50<02:37, 3.35s/it]
80
 
81
  24%|██▍ | 15/62 [00:50<02:37, 3.35s/it]
82
  26%|██▌ | 16/62 [00:52<02:27, 3.21s/it]
83
 
84
  26%|██▌ | 16/62 [00:52<02:27, 3.21s/it]
 
85
  0%| | 0/6 [00:00<?, ?it/s]
 
86
  33%|███▎ | 2/6 [00:00<00:00, 8.86it/s]
 
87
  50%|█████ | 3/6 [00:00<00:00, 6.24it/s]
 
88
  67%|██████▋ | 4/6 [00:00<00:00, 5.41it/s]
 
89
  83%|████████▎ | 5/6 [00:00<00:00, 5.02it/s]
 
90
 
 
91
 
92
  26%|██▌ | 16/62 [00:54<02:27, 3.21s/it]
 
 
93
 
94
  27%|██▋ | 17/62 [00:57<02:39, 3.54s/it]
95
 
96
  27%|██▋ | 17/62 [00:57<02:39, 3.54s/it]
97
  29%|██▉ | 18/62 [01:00<02:27, 3.35s/it]
98
 
99
  29%|██▉ | 18/62 [01:00<02:27, 3.35s/it]
100
  31%|███ | 19/62 [01:03<02:18, 3.22s/it]
101
 
102
  31%|███ | 19/62 [01:03<02:18, 3.22s/it]
103
  32%|███▏ | 20/62 [01:05<02:11, 3.13s/it]
104
 
105
  32%|███▏ | 20/62 [01:05<02:11, 3.13s/it]
 
106
  0%| | 0/6 [00:00<?, ?it/s]
 
107
  33%|███▎ | 2/6 [00:00<00:00, 8.83it/s]
 
108
  50%|█████ | 3/6 [00:00<00:00, 6.24it/s]
 
109
  67%|██████▋ | 4/6 [00:00<00:00, 5.41it/s]
 
110
  83%|████████▎ | 5/6 [00:00<00:00, 5.01it/s]
 
111
 
 
112
 
113
  32%|███▏ | 20/62 [01:07<02:11, 3.13s/it]
 
 
114
 
115
  34%|███▍ | 21/62 [01:10<02:22, 3.48s/it]
116
 
117
  34%|███▍ | 21/62 [01:10<02:22, 3.48s/it]
118
  35%|███▌ | 22/62 [01:13<02:12, 3.31s/it]
119
 
120
  35%|███▌ | 22/62 [01:13<02:12, 3.31s/it]
121
  37%|███▋ | 23/62 [01:16<02:04, 3.19s/it]
122
 
123
  37%|███▋ | 23/62 [01:16<02:04, 3.19s/it]
124
  39%|███▊ | 24/62 [01:18<01:58, 3.11s/it]
125
 
126
  39%|███▊ | 24/62 [01:19<01:58, 3.11s/it]
 
127
  0%| | 0/6 [00:00<?, ?it/s]
 
128
  33%|███▎ | 2/6 [00:00<00:00, 8.83it/s]
 
129
  50%|█████ | 3/6 [00:00<00:00, 6.23it/s]
 
130
  67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
 
131
  83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
 
132
 
 
133
 
134
  39%|███▊ | 24/62 [01:20<01:58, 3.11s/it]
 
 
135
 
136
  40%|████ | 25/62 [01:23<02:06, 3.42s/it]
137
 
138
  40%|████ | 25/62 [01:23<02:06, 3.42s/it]
139
  42%|████▏ | 26/62 [01:26<01:57, 3.27s/it]
140
 
141
  42%|████▏ | 26/62 [01:26<01:57, 3.27s/it]
142
  44%|████▎ | 27/62 [01:29<01:58, 3.39s/it]
143
 
144
  44%|████▎ | 27/62 [01:29<01:58, 3.39s/it]
145
  45%|████▌ | 28/62 [01:32<01:50, 3.25s/it]
146
 
147
  45%|████▌ | 28/62 [01:32<01:50, 3.25s/it]
 
148
  0%| | 0/6 [00:00<?, ?it/s]
 
149
  33%|███▎ | 2/6 [00:00<00:00, 8.84it/s]
 
150
  50%|█████ | 3/6 [00:00<00:00, 6.23it/s]
 
151
  67%|██████▋ | 4/6 [00:00<00:00, 5.40it/s]
 
152
  83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
 
153
 
 
154
 
155
  45%|████▌ | 28/62 [01:34<01:50, 3.25s/it]
 
 
156
 
157
  47%|████▋ | 29/62 [01:36<01:57, 3.57s/it]
158
 
159
  47%|████▋ | 29/62 [01:36<01:57, 3.57s/it]
160
  48%|████▊ | 30/62 [01:39<01:47, 3.37s/it]
161
 
162
  48%|████▊ | 30/62 [01:39<01:47, 3.37s/it]
163
  50%|█████ | 31/62 [01:42<01:40, 3.24s/it]
164
 
165
  50%|█████ | 31/62 [01:42<01:40, 3.24s/it]
166
  52%|█████▏ | 32/62 [01:45<01:34, 3.14s/it]
167
 
168
  52%|█████▏ | 32/62 [01:45<01:34, 3.14s/it]
 
169
  0%| | 0/6 [00:00<?, ?it/s]
 
170
  33%|███▎ | 2/6 [00:00<00:00, 8.81it/s]
 
171
  50%|█████ | 3/6 [00:00<00:00, 6.21it/s]
 
172
  67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
 
173
  83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
 
174
 
 
175
 
176
  52%|█████▏ | 32/62 [01:47<01:34, 3.14s/it]
 
 
177
 
178
  53%|█████▎ | 33/62 [01:50<01:41, 3.49s/it]
179
 
180
  53%|█████▎ | 33/62 [01:50<01:41, 3.49s/it]
181
  55%|█████▍ | 34/62 [01:52<01:32, 3.32s/it]
182
 
183
  55%|█████▍ | 34/62 [01:52<01:32, 3.32s/it]
184
  56%|█████▋ | 35/62 [01:55<01:26, 3.20s/it]
185
 
186
  56%|█████▋ | 35/62 [01:55<01:26, 3.20s/it]
187
  58%|█████▊ | 36/62 [01:58<01:20, 3.11s/it]
188
 
189
  58%|█████▊ | 36/62 [01:58<01:20, 3.11s/it]
 
190
  0%| | 0/6 [00:00<?, ?it/s]
 
191
  33%|███▎ | 2/6 [00:00<00:00, 8.82it/s]
 
192
  50%|█████ | 3/6 [00:00<00:00, 6.22it/s]
 
193
  67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
 
194
  83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
 
195
 
 
196
 
197
  58%|█████▊ | 36/62 [02:00<01:20, 3.11s/it]
 
 
198
 
199
  60%|█████▉ | 37/62 [02:03<01:26, 3.47s/it]
200
 
201
  60%|█████▉ | 37/62 [02:03<01:26, 3.47s/it]
202
  61%|██████▏ | 38/62 [02:05<01:18, 3.26s/it]
203
 
204
  61%|██████▏ | 38/62 [02:05<01:18, 3.26s/it]
205
  63%|██████▎ | 39/62 [02:08<01:12, 3.15s/it]
206
 
207
  63%|██████▎ | 39/62 [02:08<01:12, 3.15s/it]
208
  65%|██████▍ | 40/62 [02:12<01:12, 3.32s/it]
209
 
210
  65%|██████▍ | 40/62 [02:12<01:12, 3.32s/it]
 
211
  0%| | 0/6 [00:00<?, ?it/s]
 
212
  33%|███▎ | 2/6 [00:00<00:00, 8.84it/s]
 
213
  50%|█████ | 3/6 [00:00<00:00, 6.23it/s]
 
214
  67%|██████▋ | 4/6 [00:00<00:00, 5.40it/s]
 
215
  83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
 
216
 
 
217
 
218
  65%|██████▍ | 40/62 [02:13<01:12, 3.32s/it]
 
 
219
 
220
  66%|██████▌ | 41/62 [02:16<01:15, 3.61s/it]
221
 
222
  66%|██████▌ | 41/62 [02:16<01:15, 3.61s/it]
223
  68%|██████▊ | 42/62 [02:19<01:08, 3.40s/it]
224
 
225
  68%|██████▊ | 42/62 [02:19<01:08, 3.40s/it]
226
  69%|██████▉ | 43/62 [02:22<01:01, 3.26s/it]
227
 
228
  69%|██████▉ | 43/62 [02:22<01:01, 3.26s/it]
229
  71%|███████ | 44/62 [02:25<00:56, 3.16s/it]
230
 
231
  71%|███████ | 44/62 [02:25<00:56, 3.16s/it]
 
232
  0%| | 0/6 [00:00<?, ?it/s]
 
233
  33%|███▎ | 2/6 [00:00<00:00, 8.80it/s]
 
234
  50%|█████ | 3/6 [00:00<00:00, 6.21it/s]
 
235
  67%|██████▋ | 4/6 [00:00<00:00, 5.38it/s]
 
236
  83%|████████▎ | 5/6 [00:00<00:00, 4.99it/s]
 
237
 
 
238
 
239
  71%|███████ | 44/62 [02:26<00:56, 3.16s/it]
 
 
240
 
241
  73%|███████▎ | 45/62 [02:29<00:59, 3.50s/it]
242
 
243
  73%|███████▎ | 45/62 [02:29<00:59, 3.50s/it]
244
  74%|███████▍ | 46/62 [02:32<00:53, 3.33s/it]
245
 
246
  74%|███████▍ | 46/62 [02:32<00:53, 3.33s/it]
247
  76%|███████▌ | 47/62 [02:35<00:48, 3.20s/it]
248
 
249
  76%|███████▌ | 47/62 [02:35<00:48, 3.20s/it]
250
  77%|███████▋ | 48/62 [02:38<00:43, 3.12s/it]
251
 
252
  77%|███████▋ | 48/62 [02:38<00:43, 3.12s/it]
 
253
  0%| | 0/6 [00:00<?, ?it/s]
 
254
  33%|███▎ | 2/6 [00:00<00:00, 8.80it/s]
 
255
  50%|█████ | 3/6 [00:00<00:00, 6.21it/s]
 
256
  67%|██████▋ | 4/6 [00:00<00:00, 5.38it/s]
 
257
  83%|████████▎ | 5/6 [00:00<00:00, 4.98it/s]
 
258
 
 
259
 
260
  77%|███████▋ | 48/62 [02:39<00:43, 3.12s/it]
 
 
261
 
262
  79%|███████▉ | 49/62 [02:42<00:45, 3.48s/it]
263
 
264
  79%|███████▉ | 49/62 [02:42<00:45, 3.48s/it]
265
  81%|████████ | 50/62 [02:45<00:39, 3.26s/it]
266
 
267
  81%|████████ | 50/62 [02:45<00:39, 3.26s/it]
268
  82%|████████▏ | 51/62 [02:48<00:34, 3.16s/it]
269
 
270
  82%|████████▏ | 51/62 [02:48<00:34, 3.16s/it]
271
  84%|████████▍ | 52/62 [02:51<00:30, 3.09s/it]
272
 
273
  84%|████████▍ | 52/62 [02:51<00:30, 3.09s/it]
 
274
  0%| | 0/6 [00:00<?, ?it/s]
 
275
  33%|███▎ | 2/6 [00:00<00:00, 8.78it/s]
 
276
  50%|█████ | 3/6 [00:00<00:00, 6.18it/s]
 
277
  67%|██████▋ | 4/6 [00:00<00:00, 5.37it/s]
 
278
  83%|████████▎ | 5/6 [00:00<00:00, 4.98it/s]
 
279
 
 
280
 
281
  84%|████████▍ | 52/62 [02:52<00:30, 3.09s/it]
 
 
282
 
283
  85%|████████▌ | 53/62 [02:56<00:33, 3.74s/it]
284
 
285
  85%|████████▌ | 53/62 [02:56<00:33, 3.74s/it]
286
  87%|████████▋ | 54/62 [02:59<00:27, 3.49s/it]
287
 
288
  87%|████████▋ | 54/62 [02:59<00:27, 3.49s/it]
289
  89%|████████▊ | 55/62 [03:02<00:23, 3.32s/it]
290
 
291
  89%|████████▊ | 55/62 [03:02<00:23, 3.32s/it]
292
  90%|█████████ | 56/62 [03:05<00:19, 3.20s/it]
293
 
294
  90%|█████████ | 56/62 [03:05<00:19, 3.20s/it]
 
295
  0%| | 0/6 [00:00<?, ?it/s]
 
296
  33%|███▎ | 2/6 [00:00<00:00, 8.79it/s]
 
297
  50%|█████ | 3/6 [00:00<00:00, 6.19it/s]
 
298
  67%|██████▋ | 4/6 [00:00<00:00, 5.37it/s]
 
299
  83%|████████▎ | 5/6 [00:00<00:00, 4.99it/s]
 
300
 
 
301
 
302
  90%|█████████ | 56/62 [03:06<00:19, 3.20s/it]
 
 
303
 
304
  92%|█████████▏| 57/62 [03:09<00:17, 3.53s/it]
305
 
306
  92%|█████████▏| 57/62 [03:09<00:17, 3.53s/it]
307
  94%|█████████▎| 58/62 [03:12<00:13, 3.35s/it]
308
 
309
  94%|█████████▎| 58/62 [03:12<00:13, 3.35s/it]
310
  95%|█████████▌| 59/62 [03:15<00:09, 3.22s/it]
311
 
312
  95%|█████████▌| 59/62 [03:15<00:09, 3.22s/it]
313
  97%|█████████▋| 60/62 [03:18<00:06, 3.13s/it]
314
 
315
  97%|█████████▋| 60/62 [03:18<00:06, 3.13s/it]
 
316
  0%| | 0/6 [00:00<?, ?it/s]
 
317
  33%|███▎ | 2/6 [00:00<00:00, 8.81it/s]
 
318
  50%|█████ | 3/6 [00:00<00:00, 6.22it/s]
 
319
  67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
 
320
  83%|████████▎ | 5/6 [00:00<00:00, 4.99it/s]
 
321
 
 
322
 
323
  97%|█████████▋| 60/62 [03:19<00:06, 3.13s/it]
 
 
324
 
325
  98%|█████████▊| 61/62 [03:22<00:03, 3.49s/it]
326
 
327
  98%|█████████▊| 61/62 [03:22<00:03, 3.49s/it]
328
 
329
 
 
1
+ mpi/openmpi/4.1.5/gcc-4.8.5
2
+ | -- libs/gcc/system
3
+ | * --> OK
4
+ |
5
+ OK
6
+ mpi/openmpi/4.1.5/gcc-4.8.5 ... UNLOADING --> OK
7
+ libs/gcc/system ... UNLOADING --> OK
8
+ The following values were not passed to `accelerate launch` and had defaults used instead:
9
+ `--num_processes` was set to a value of `1`
10
+ `--num_machines` was set to a value of `1`
11
+ `--mixed_precision` was set to a value of `'no'`
12
+ `--dynamo_backend` was set to a value of `'no'`
13
+ To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.
14
+
15
+
16
+
17
+
18
+
19
  0%| | 0/62 [00:00<?, ?it/s]
20
  2%|▏ | 1/62 [00:03<03:15, 3.20s/it]
21
 
22
  2%|▏ | 1/62 [00:03<03:15, 3.20s/it]
23
+
24
  0%| | 0/6 [00:00<?, ?it/s]
25
+
26
  33%|███▎ | 2/6 [00:00<00:00, 8.95it/s]
27
+
28
  50%|█████ | 3/6 [00:00<00:00, 6.32it/s]
29
+
30
  67%|██████▋ | 4/6 [00:00<00:00, 5.47it/s]
31
+
32
  83%|████████▎ | 5/6 [00:00<00:00, 5.07it/s]
33
+
34
 
35
+
36
 
37
  2%|▏ | 1/62 [00:04<03:15, 3.20s/it]
38
+
39
+
40
 
41
  3%|▎ | 2/62 [00:07<03:49, 3.82s/it]
42
 
43
  3%|▎ | 2/62 [00:07<03:49, 3.82s/it]
44
  5%|▍ | 3/62 [00:10<03:20, 3.40s/it]
45
 
46
  5%|▍ | 3/62 [00:10<03:20, 3.40s/it]
47
  6%|▋ | 4/62 [00:13<03:05, 3.20s/it]
48
 
49
  6%|▋ | 4/62 [00:13<03:05, 3.20s/it]
50
+
51
  0%| | 0/6 [00:00<?, ?it/s]
52
+
53
  33%|███▎ | 2/6 [00:00<00:00, 8.91it/s]
54
+
55
  50%|█████ | 3/6 [00:00<00:00, 6.29it/s]
56
+
57
  67%|██████▋ | 4/6 [00:00<00:00, 5.45it/s]
58
+
59
  83%|████████▎ | 5/6 [00:00<00:00, 5.06it/s]
60
+
61
 
62
+
63
 
64
  6%|▋ | 4/62 [00:14<03:05, 3.20s/it]
65
+
66
+
67
 
68
  8%|▊ | 5/62 [00:17<03:24, 3.58s/it]
69
 
70
  8%|▊ | 5/62 [00:17<03:24, 3.58s/it]
71
  10%|▉ | 6/62 [00:20<03:07, 3.35s/it]
72
 
73
  10%|▉ | 6/62 [00:20<03:07, 3.35s/it]
74
  11%|█▏ | 7/62 [00:23<02:56, 3.20s/it]
75
 
76
  11%|█▏ | 7/62 [00:23<02:56, 3.20s/it]
77
  13%|█▎ | 8/62 [00:26<02:47, 3.10s/it]
78
 
79
  13%|█▎ | 8/62 [00:26<02:47, 3.10s/it]
80
+
81
  0%| | 0/6 [00:00<?, ?it/s]
82
+
83
  33%|███▎ | 2/6 [00:00<00:00, 8.92it/s]
84
+
85
  50%|█████ | 3/6 [00:00<00:00, 6.28it/s]
86
+
87
  67%|██████▋ | 4/6 [00:00<00:00, 5.44it/s]
88
+
89
  83%|████████▎ | 5/6 [00:00<00:00, 5.04it/s]
90
+
91
 
92
+
93
 
94
  13%|█▎ | 8/62 [00:27<02:47, 3.10s/it]
95
+
96
+
97
 
98
  15%|█▍ | 9/62 [00:30<03:03, 3.47s/it]
99
 
100
  15%|█▍ | 9/62 [00:30<03:03, 3.47s/it]
101
  16%|█▌ | 10/62 [00:33<02:51, 3.29s/it]
102
 
103
  16%|█▌ | 10/62 [00:33<02:51, 3.29s/it]
104
  18%|█▊ | 11/62 [00:36<02:41, 3.17s/it]
105
 
106
  18%|█▊ | 11/62 [00:36<02:41, 3.17s/it]
107
  19%|█▉ | 12/62 [00:39<02:34, 3.09s/it]
108
 
109
  19%|█▉ | 12/62 [00:39<02:34, 3.09s/it]
110
+
111
  0%| | 0/6 [00:00<?, ?it/s]
112
+
113
  33%|███▎ | 2/6 [00:00<00:00, 8.89it/s]
114
+
115
  50%|█████ | 3/6 [00:00<00:00, 6.27it/s]
116
+
117
  67%|██████▋ | 4/6 [00:00<00:00, 5.43it/s]
118
+
119
  83%|████████▎ | 5/6 [00:00<00:00, 5.03it/s]
120
+
121
 
122
+
123
 
124
  19%|█▉ | 12/62 [00:40<02:34, 3.09s/it]
125
+
126
+
127
 
128
  21%|██ | 13/62 [00:43<02:46, 3.40s/it]
129
 
130
  21%|██ | 13/62 [00:43<02:46, 3.40s/it]
131
  23%|██▎ | 14/62 [00:47<02:49, 3.53s/it]
132
 
133
  23%|██▎ | 14/62 [00:47<02:49, 3.53s/it]
134
  24%|██▍ | 15/62 [00:50<02:37, 3.35s/it]
135
 
136
  24%|██▍ | 15/62 [00:50<02:37, 3.35s/it]
137
  26%|██▌ | 16/62 [00:52<02:27, 3.21s/it]
138
 
139
  26%|██▌ | 16/62 [00:52<02:27, 3.21s/it]
140
+
141
  0%| | 0/6 [00:00<?, ?it/s]
142
+
143
  33%|███▎ | 2/6 [00:00<00:00, 8.86it/s]
144
+
145
  50%|█████ | 3/6 [00:00<00:00, 6.24it/s]
146
+
147
  67%|██████▋ | 4/6 [00:00<00:00, 5.41it/s]
148
+
149
  83%|████████▎ | 5/6 [00:00<00:00, 5.02it/s]
150
+
151
 
152
+
153
 
154
  26%|██▌ | 16/62 [00:54<02:27, 3.21s/it]
155
+
156
+
157
 
158
  27%|██▋ | 17/62 [00:57<02:39, 3.54s/it]
159
 
160
  27%|██▋ | 17/62 [00:57<02:39, 3.54s/it]
161
  29%|██▉ | 18/62 [01:00<02:27, 3.35s/it]
162
 
163
  29%|██▉ | 18/62 [01:00<02:27, 3.35s/it]
164
  31%|███ | 19/62 [01:03<02:18, 3.22s/it]
165
 
166
  31%|███ | 19/62 [01:03<02:18, 3.22s/it]
167
  32%|███▏ | 20/62 [01:05<02:11, 3.13s/it]
168
 
169
  32%|███▏ | 20/62 [01:05<02:11, 3.13s/it]
170
+
171
  0%| | 0/6 [00:00<?, ?it/s]
172
+
173
  33%|███▎ | 2/6 [00:00<00:00, 8.83it/s]
174
+
175
  50%|█████ | 3/6 [00:00<00:00, 6.24it/s]
176
+
177
  67%|██████▋ | 4/6 [00:00<00:00, 5.41it/s]
178
+
179
  83%|████████▎ | 5/6 [00:00<00:00, 5.01it/s]
180
+
181
 
182
+
183
 
184
  32%|███▏ | 20/62 [01:07<02:11, 3.13s/it]
185
+
186
+
187
 
188
  34%|███▍ | 21/62 [01:10<02:22, 3.48s/it]
189
 
190
  34%|███▍ | 21/62 [01:10<02:22, 3.48s/it]
191
  35%|███▌ | 22/62 [01:13<02:12, 3.31s/it]
192
 
193
  35%|███▌ | 22/62 [01:13<02:12, 3.31s/it]
194
  37%|███▋ | 23/62 [01:16<02:04, 3.19s/it]
195
 
196
  37%|███▋ | 23/62 [01:16<02:04, 3.19s/it]
197
  39%|███▊ | 24/62 [01:18<01:58, 3.11s/it]
198
 
199
  39%|███▊ | 24/62 [01:19<01:58, 3.11s/it]
200
+
201
  0%| | 0/6 [00:00<?, ?it/s]
202
+
203
  33%|███▎ | 2/6 [00:00<00:00, 8.83it/s]
204
+
205
  50%|█████ | 3/6 [00:00<00:00, 6.23it/s]
206
+
207
  67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
208
+
209
  83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
210
+
211
 
212
+
213
 
214
  39%|███▊ | 24/62 [01:20<01:58, 3.11s/it]
215
+
216
+
217
 
218
  40%|████ | 25/62 [01:23<02:06, 3.42s/it]
219
 
220
  40%|████ | 25/62 [01:23<02:06, 3.42s/it]
221
  42%|████▏ | 26/62 [01:26<01:57, 3.27s/it]
222
 
223
  42%|████▏ | 26/62 [01:26<01:57, 3.27s/it]
224
  44%|████▎ | 27/62 [01:29<01:58, 3.39s/it]
225
 
226
  44%|████▎ | 27/62 [01:29<01:58, 3.39s/it]
227
  45%|████▌ | 28/62 [01:32<01:50, 3.25s/it]
228
 
229
  45%|████▌ | 28/62 [01:32<01:50, 3.25s/it]
230
+
231
  0%| | 0/6 [00:00<?, ?it/s]
232
+
233
  33%|███▎ | 2/6 [00:00<00:00, 8.84it/s]
234
+
235
  50%|█████ | 3/6 [00:00<00:00, 6.23it/s]
236
+
237
  67%|██████▋ | 4/6 [00:00<00:00, 5.40it/s]
238
+
239
  83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
240
+
241
 
242
+
243
 
244
  45%|████▌ | 28/62 [01:34<01:50, 3.25s/it]
245
+
246
+
247
 
248
  47%|████▋ | 29/62 [01:36<01:57, 3.57s/it]
249
 
250
  47%|████▋ | 29/62 [01:36<01:57, 3.57s/it]
251
  48%|████▊ | 30/62 [01:39<01:47, 3.37s/it]
252
 
253
  48%|████▊ | 30/62 [01:39<01:47, 3.37s/it]
254
  50%|█████ | 31/62 [01:42<01:40, 3.24s/it]
255
 
256
  50%|█████ | 31/62 [01:42<01:40, 3.24s/it]
257
  52%|█████▏ | 32/62 [01:45<01:34, 3.14s/it]
258
 
259
  52%|█████▏ | 32/62 [01:45<01:34, 3.14s/it]
260
+
261
  0%| | 0/6 [00:00<?, ?it/s]
262
+
263
  33%|███▎ | 2/6 [00:00<00:00, 8.81it/s]
264
+
265
  50%|█████ | 3/6 [00:00<00:00, 6.21it/s]
266
+
267
  67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
268
+
269
  83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
270
+
271
 
272
+
273
 
274
  52%|█████▏ | 32/62 [01:47<01:34, 3.14s/it]
275
+
276
+
277
 
278
  53%|█████▎ | 33/62 [01:50<01:41, 3.49s/it]
279
 
280
  53%|█████▎ | 33/62 [01:50<01:41, 3.49s/it]
281
  55%|█████▍ | 34/62 [01:52<01:32, 3.32s/it]
282
 
283
  55%|█████▍ | 34/62 [01:52<01:32, 3.32s/it]
284
  56%|█████▋ | 35/62 [01:55<01:26, 3.20s/it]
285
 
286
  56%|█████▋ | 35/62 [01:55<01:26, 3.20s/it]
287
  58%|█████▊ | 36/62 [01:58<01:20, 3.11s/it]
288
 
289
  58%|█████▊ | 36/62 [01:58<01:20, 3.11s/it]
290
+
291
  0%| | 0/6 [00:00<?, ?it/s]
292
+
293
  33%|███▎ | 2/6 [00:00<00:00, 8.82it/s]
294
+
295
  50%|█████ | 3/6 [00:00<00:00, 6.22it/s]
296
+
297
  67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
298
+
299
  83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
300
+
301
 
302
+
303
 
304
  58%|█████▊ | 36/62 [02:00<01:20, 3.11s/it]
305
+
306
+
307
 
308
  60%|█████▉ | 37/62 [02:03<01:26, 3.47s/it]
309
 
310
  60%|█████▉ | 37/62 [02:03<01:26, 3.47s/it]
311
  61%|██████▏ | 38/62 [02:05<01:18, 3.26s/it]
312
 
313
  61%|██████▏ | 38/62 [02:05<01:18, 3.26s/it]
314
  63%|██████▎ | 39/62 [02:08<01:12, 3.15s/it]
315
 
316
  63%|██████▎ | 39/62 [02:08<01:12, 3.15s/it]
317
  65%|██████▍ | 40/62 [02:12<01:12, 3.32s/it]
318
 
319
  65%|██████▍ | 40/62 [02:12<01:12, 3.32s/it]
320
+
321
  0%| | 0/6 [00:00<?, ?it/s]
322
+
323
  33%|███▎ | 2/6 [00:00<00:00, 8.84it/s]
324
+
325
  50%|█████ | 3/6 [00:00<00:00, 6.23it/s]
326
+
327
  67%|██████▋ | 4/6 [00:00<00:00, 5.40it/s]
328
+
329
  83%|████████▎ | 5/6 [00:00<00:00, 5.00it/s]
330
+
331
 
332
+
333
 
334
  65%|██████▍ | 40/62 [02:13<01:12, 3.32s/it]
335
+
336
+
337
 
338
  66%|██████▌ | 41/62 [02:16<01:15, 3.61s/it]
339
 
340
  66%|██████▌ | 41/62 [02:16<01:15, 3.61s/it]
341
  68%|██████▊ | 42/62 [02:19<01:08, 3.40s/it]
342
 
343
  68%|██████▊ | 42/62 [02:19<01:08, 3.40s/it]
344
  69%|██████▉ | 43/62 [02:22<01:01, 3.26s/it]
345
 
346
  69%|██████▉ | 43/62 [02:22<01:01, 3.26s/it]
347
  71%|███████ | 44/62 [02:25<00:56, 3.16s/it]
348
 
349
  71%|███████ | 44/62 [02:25<00:56, 3.16s/it]
350
+
351
  0%| | 0/6 [00:00<?, ?it/s]
352
+
353
  33%|███▎ | 2/6 [00:00<00:00, 8.80it/s]
354
+
355
  50%|█████ | 3/6 [00:00<00:00, 6.21it/s]
356
+
357
  67%|██████▋ | 4/6 [00:00<00:00, 5.38it/s]
358
+
359
  83%|████████▎ | 5/6 [00:00<00:00, 4.99it/s]
360
+
361
 
362
+
363
 
364
  71%|███████ | 44/62 [02:26<00:56, 3.16s/it]
365
+
366
+
367
 
368
  73%|███████▎ | 45/62 [02:29<00:59, 3.50s/it]
369
 
370
  73%|███████▎ | 45/62 [02:29<00:59, 3.50s/it]
371
  74%|███████▍ | 46/62 [02:32<00:53, 3.33s/it]
372
 
373
  74%|███████▍ | 46/62 [02:32<00:53, 3.33s/it]
374
  76%|███████▌ | 47/62 [02:35<00:48, 3.20s/it]
375
 
376
  76%|███████▌ | 47/62 [02:35<00:48, 3.20s/it]
377
  77%|███████▋ | 48/62 [02:38<00:43, 3.12s/it]
378
 
379
  77%|███████▋ | 48/62 [02:38<00:43, 3.12s/it]
380
+
381
  0%| | 0/6 [00:00<?, ?it/s]
382
+
383
  33%|███▎ | 2/6 [00:00<00:00, 8.80it/s]
384
+
385
  50%|█████ | 3/6 [00:00<00:00, 6.21it/s]
386
+
387
  67%|██████▋ | 4/6 [00:00<00:00, 5.38it/s]
388
+
389
  83%|████████▎ | 5/6 [00:00<00:00, 4.98it/s]
390
+
391
 
392
+
393
 
394
  77%|███████▋ | 48/62 [02:39<00:43, 3.12s/it]
395
+
396
+
397
 
398
  79%|███████▉ | 49/62 [02:42<00:45, 3.48s/it]
399
 
400
  79%|███████▉ | 49/62 [02:42<00:45, 3.48s/it]
401
  81%|████████ | 50/62 [02:45<00:39, 3.26s/it]
402
 
403
  81%|████████ | 50/62 [02:45<00:39, 3.26s/it]
404
  82%|████████▏ | 51/62 [02:48<00:34, 3.16s/it]
405
 
406
  82%|████████▏ | 51/62 [02:48<00:34, 3.16s/it]
407
  84%|████████▍ | 52/62 [02:51<00:30, 3.09s/it]
408
 
409
  84%|████████▍ | 52/62 [02:51<00:30, 3.09s/it]
410
+
411
  0%| | 0/6 [00:00<?, ?it/s]
412
+
413
  33%|███▎ | 2/6 [00:00<00:00, 8.78it/s]
414
+
415
  50%|█████ | 3/6 [00:00<00:00, 6.18it/s]
416
+
417
  67%|██████▋ | 4/6 [00:00<00:00, 5.37it/s]
418
+
419
  83%|████████▎ | 5/6 [00:00<00:00, 4.98it/s]
420
+
421
 
422
+
423
 
424
  84%|████████▍ | 52/62 [02:52<00:30, 3.09s/it]
425
+
426
+
427
 
428
  85%|████████▌ | 53/62 [02:56<00:33, 3.74s/it]
429
 
430
  85%|████████▌ | 53/62 [02:56<00:33, 3.74s/it]
431
  87%|████████▋ | 54/62 [02:59<00:27, 3.49s/it]
432
 
433
  87%|████████▋ | 54/62 [02:59<00:27, 3.49s/it]
434
  89%|████████▊ | 55/62 [03:02<00:23, 3.32s/it]
435
 
436
  89%|████████▊ | 55/62 [03:02<00:23, 3.32s/it]
437
  90%|█████████ | 56/62 [03:05<00:19, 3.20s/it]
438
 
439
  90%|█████████ | 56/62 [03:05<00:19, 3.20s/it]
440
+
441
  0%| | 0/6 [00:00<?, ?it/s]
442
+
443
  33%|███▎ | 2/6 [00:00<00:00, 8.79it/s]
444
+
445
  50%|█████ | 3/6 [00:00<00:00, 6.19it/s]
446
+
447
  67%|██████▋ | 4/6 [00:00<00:00, 5.37it/s]
448
+
449
  83%|████████▎ | 5/6 [00:00<00:00, 4.99it/s]
450
+
451
 
452
+
453
 
454
  90%|█████████ | 56/62 [03:06<00:19, 3.20s/it]
455
+
456
+
457
 
458
  92%|█████████▏| 57/62 [03:09<00:17, 3.53s/it]
459
 
460
  92%|█████████▏| 57/62 [03:09<00:17, 3.53s/it]
461
  94%|█████████▎| 58/62 [03:12<00:13, 3.35s/it]
462
 
463
  94%|█████████▎| 58/62 [03:12<00:13, 3.35s/it]
464
  95%|█████████▌| 59/62 [03:15<00:09, 3.22s/it]
465
 
466
  95%|█████████▌| 59/62 [03:15<00:09, 3.22s/it]
467
  97%|█████████▋| 60/62 [03:18<00:06, 3.13s/it]
468
 
469
  97%|█████████▋| 60/62 [03:18<00:06, 3.13s/it]
470
+
471
  0%| | 0/6 [00:00<?, ?it/s]
472
+
473
  33%|███▎ | 2/6 [00:00<00:00, 8.81it/s]
474
+
475
  50%|█████ | 3/6 [00:00<00:00, 6.22it/s]
476
+
477
  67%|██████▋ | 4/6 [00:00<00:00, 5.39it/s]
478
+
479
  83%|████████▎ | 5/6 [00:00<00:00, 4.99it/s]
480
+
481
 
482
+
483
 
484
  97%|█████████▋| 60/62 [03:19<00:06, 3.13s/it]
485
+
486
+
487
 
488
  98%|█████████▊| 61/62 [03:22<00:03, 3.49s/it]
489
 
490
  98%|█████████▊| 61/62 [03:22<00:03, 3.49s/it]
491
 
492
 
dmog/job.output ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Your results will be stored in: /mnt/scratch/users/dhd2000/ft14/dmog/axolotl-test-outputs
2
+ Executing job commands, current working directory is /mnt/scratch/users/dhd2000/ft14/dmog
3
+ /mnt/scratch/users/dhd2000/ft14
4
+ [2024-04-09 08:29:06,912] [INFO] [datasets.<module>:58] [PID:30736] PyTorch version 2.1.2 available.
5
+ [2024-04-09 08:29:08,482] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)
6
+ [2024-04-09 08:29:10,330] [INFO] [axolotl.normalize_config:178] [PID:30736] [RANK:0] GPU memory usage baseline: 0.000GB (+0.640GB misc)
7
+ dP dP dP
8
+ 88 88 88
9
+ .d8888b. dP. .dP .d8888b. 88 .d8888b. d8888P 88
10
+ 88' `88 `8bd8' 88' `88 88 88' `88 88 88
11
+ 88. .88 .d88b. 88. .88 88 88. .88 88 88
12
+ `88888P8 dP' `dP `88888P' dP `88888P' dP dP
13
+
14
+
15
+
16
+ [2024-04-09 08:29:10,707] [DEBUG] [axolotl.load_tokenizer:245] [PID:30736] [RANK:0] EOS: 2 / </s>
17
+ [2024-04-09 08:29:10,708] [DEBUG] [axolotl.load_tokenizer:246] [PID:30736] [RANK:0] BOS: 1 / <s>
18
+ [2024-04-09 08:29:10,708] [DEBUG] [axolotl.load_tokenizer:247] [PID:30736] [RANK:0] PAD: 2 / </s>
19
+ [2024-04-09 08:29:10,708] [DEBUG] [axolotl.load_tokenizer:248] [PID:30736] [RANK:0] UNK: 0 / <unk>
20
+ [2024-04-09 08:29:10,708] [INFO] [axolotl.load_tokenizer:259] [PID:30736] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference.
21
+ [2024-04-09 08:29:10,708] [INFO] [axolotl.load_tokenized_prepared_datasets:191] [PID:30736] [RANK:0] Unable to find prepared dataset in last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e
22
+ [2024-04-09 08:29:10,708] [INFO] [axolotl.load_tokenized_prepared_datasets:192] [PID:30736] [RANK:0] Loading raw datasets...
23
+ [2024-04-09 08:29:10,709] [WARNING] [axolotl.load_tokenized_prepared_datasets:194] [PID:30736] [RANK:0] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset.
24
+ [2024-04-09 08:29:10,709] [INFO] [axolotl.load_tokenized_prepared_datasets:201] [PID:30736] [RANK:0] No seed provided, using default seed of 42
25
+ [2024-04-09 08:29:17,092] [INFO] [axolotl.load_tokenized_prepared_datasets:414] [PID:30736] [RANK:0] merging datasets
26
+ [2024-04-09 08:29:17,096] [INFO] [axolotl.log:61] [PID:30736] [RANK:0] dropping attention_mask column
27
+ [2024-04-09 08:29:18,698] [INFO] [axolotl.load_tokenized_prepared_datasets:424] [PID:30736] [RANK:0] Saving merged prepared dataset to disk... last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e
28
+ [2024-04-09 08:29:18,755] [DEBUG] [axolotl.log:61] [PID:30736] [RANK:0] total_num_tokens: 21468
29
+ [2024-04-09 08:29:18,756] [DEBUG] [axolotl.log:61] [PID:30736] [RANK:0] `total_supervised_tokens: 259`
30
+ [2024-04-09 08:29:18,756] [DEBUG] [axolotl.log:61] [PID:30736] [RANK:0] total_num_steps: 62
31
+ [2024-04-09 08:29:18,756] [INFO] [axolotl.prepare_dataset:124] [PID:30736] [RANK:0] Maximum number of steps set at 62
32
+ [2024-04-09 08:29:18,759] [DEBUG] [axolotl.train.log:61] [PID:30736] [RANK:0] loading tokenizer... mistralai/Mistral-7B-v0.1
33
+ [2024-04-09 08:29:19,028] [DEBUG] [axolotl.load_tokenizer:245] [PID:30736] [RANK:0] EOS: 2 / </s>
34
+ [2024-04-09 08:29:19,028] [DEBUG] [axolotl.load_tokenizer:246] [PID:30736] [RANK:0] BOS: 1 / <s>
35
+ [2024-04-09 08:29:19,028] [DEBUG] [axolotl.load_tokenizer:247] [PID:30736] [RANK:0] PAD: 2 / </s>
36
+ [2024-04-09 08:29:19,029] [DEBUG] [axolotl.load_tokenizer:248] [PID:30736] [RANK:0] UNK: 0 / <unk>
37
+ [2024-04-09 08:29:19,029] [INFO] [axolotl.load_tokenizer:259] [PID:30736] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference.
38
+ [2024-04-09 08:29:19,029] [DEBUG] [axolotl.train.log:61] [PID:30736] [RANK:0] loading model and peft_config...
39
+ [2024-04-09 08:29:35,702] [INFO] [axolotl.load_model:660] [PID:30736] [RANK:0] GPU memory usage after model load: 4.342GB (+0.138GB cache, +0.942GB misc)
40
+ [2024-04-09 08:29:35,711] [INFO] [axolotl.load_model:701] [PID:30736] [RANK:0] converting PEFT model w/ prepare_model_for_kbit_training
41
+ [2024-04-09 08:29:35,713] [INFO] [axolotl.load_model:710] [PID:30736] [RANK:0] converting modules to torch.bfloat16 for flash attention
42
+ [2024-04-09 08:29:35,715] [INFO] [axolotl.load_lora:825] [PID:30736] [RANK:0] found linear modules: ['up_proj', 'q_proj', 'k_proj', 'gate_proj', 'down_proj', 'o_proj', 'v_proj']
43
+ trainable params: 83,886,080 || all params: 7,325,618,176 || trainable%: 1.1451058188485088
44
+ [2024-04-09 08:29:36,348] [INFO] [axolotl.load_model:750] [PID:30736] [RANK:0] GPU memory usage after adapters: 4.670GB (+0.935GB cache, +0.942GB misc)
45
+ [2024-04-09 08:29:36,446] [INFO] [axolotl.train.log:61] [PID:30736] [RANK:0] Pre-saving adapter config to ./qlora-out
46
+ [2024-04-09 08:29:36,459] [INFO] [axolotl.train.log:61] [PID:30736] [RANK:0] Starting trainer...
47
+ {'loss': 6.6367, 'grad_norm': 102.28898620605469, 'learning_rate': 2e-05, 'epoch': 0.08}
48
+ {'eval_loss': 7.300913333892822, 'eval_runtime': 1.3523, 'eval_samples_per_second': 8.873, 'eval_steps_per_second': 4.437, 'epoch': 0.08}
49
+ [2024-04-09 08:29:44,573] [INFO] [axolotl.callbacks.on_step_end:123] [PID:30736] [RANK:0] GPU memory usage while training: 4.843GB (+1.177GB cache, +0.965GB misc)
50
+ {'loss': 7.0616, 'grad_norm': 103.4541015625, 'learning_rate': 4e-05, 'epoch': 0.16}
51
+ {'loss': 4.686, 'grad_norm': 67.47515869140625, 'learning_rate': 6e-05, 'epoch': 0.24}
52
+ {'loss': 2.3866, 'grad_norm': 72.36919403076172, 'learning_rate': 8e-05, 'epoch': 0.32}
53
+ {'eval_loss': 0.7137572169303894, 'eval_runtime': 1.3532, 'eval_samples_per_second': 8.868, 'eval_steps_per_second': 4.434, 'epoch': 0.32}
54
+ {'loss': 0.6844, 'grad_norm': 16.83085060119629, 'learning_rate': 0.0001, 'epoch': 0.4}
55
+ {'loss': 0.914, 'grad_norm': 25.897714614868164, 'learning_rate': 0.00012, 'epoch': 0.48}
56
+ {'loss': 0.63, 'grad_norm': 18.89151382446289, 'learning_rate': 0.00014, 'epoch': 0.56}
57
+ {'loss': 0.948, 'grad_norm': 27.15555763244629, 'learning_rate': 0.00016, 'epoch': 0.64}
58
+ {'eval_loss': 1.0445994138717651, 'eval_runtime': 1.356, 'eval_samples_per_second': 8.85, 'eval_steps_per_second': 4.425, 'epoch': 0.64}
59
+ {'loss': 1.0285, 'grad_norm': 20.812381744384766, 'learning_rate': 0.00018, 'epoch': 0.72}
60
+ {'loss': 1.3756, 'grad_norm': 56.3886604309082, 'learning_rate': 0.0002, 'epoch': 0.8}
61
+ {'loss': 0.5178, 'grad_norm': 6.24803352355957, 'learning_rate': 0.00019981755542233177, 'epoch': 0.88}
62
+ {'loss': 0.6822, 'grad_norm': 8.379430770874023, 'learning_rate': 0.0001992708874098054, 'epoch': 0.96}
63
+ {'eval_loss': 1.3959709405899048, 'eval_runtime': 1.3583, 'eval_samples_per_second': 8.835, 'eval_steps_per_second': 4.417, 'epoch': 0.96}
64
+ {'loss': 1.3762, 'grad_norm': 20.744348526000977, 'learning_rate': 0.00019836199069471437, 'epoch': 1.04}
65
+ {'loss': 0.5248, 'grad_norm': 4.800480842590332, 'learning_rate': 0.0001970941817426052, 'epoch': 1.12}
66
+ {'loss': 0.8094, 'grad_norm': 11.284302711486816, 'learning_rate': 0.00019547208665085457, 'epoch': 1.2}
67
+ {'loss': 0.5222, 'grad_norm': 5.787976264953613, 'learning_rate': 0.0001935016242685415, 'epoch': 1.28}
68
+ {'eval_loss': 0.9023411870002747, 'eval_runtime': 1.3623, 'eval_samples_per_second': 8.808, 'eval_steps_per_second': 4.404, 'epoch': 1.28}
69
+ {'loss': 0.8027, 'grad_norm': 21.48629379272461, 'learning_rate': 0.00019118998459920902, 'epoch': 1.36}
70
+ {'loss': 1.7772, 'grad_norm': 38.0982666015625, 'learning_rate': 0.000188545602565321, 'epoch': 1.44}
71
+ {'loss': 0.7737, 'grad_norm': 10.824837684631348, 'learning_rate': 0.00018557812723014476, 'epoch': 1.52}
72
+ {'loss': 0.534, 'grad_norm': 9.1353120803833, 'learning_rate': 0.00018229838658936564, 'epoch': 1.6}
73
+ {'eval_loss': 0.4847445785999298, 'eval_runtime': 1.3637, 'eval_samples_per_second': 8.799, 'eval_steps_per_second': 4.4, 'epoch': 1.6}
74
+ {'loss': 0.3201, 'grad_norm': 3.8411033153533936, 'learning_rate': 0.00017871834806090501, 'epoch': 1.68}
75
+ {'loss': 2.2541, 'grad_norm': 23.888507843017578, 'learning_rate': 0.00017485107481711012, 'epoch': 1.76}
76
+ {'loss': 0.8177, 'grad_norm': 8.5956392288208, 'learning_rate': 0.00017071067811865476, 'epoch': 1.84}
77
+ {'loss': 0.4624, 'grad_norm': 3.825141191482544, 'learning_rate': 0.00016631226582407952, 'epoch': 1.92}
78
+ {'eval_loss': 0.5740255117416382, 'eval_runtime': 1.3655, 'eval_samples_per_second': 8.788, 'eval_steps_per_second': 4.394, 'epoch': 1.92}
79
+ {'loss': 0.3714, 'grad_norm': 3.558993101119995, 'learning_rate': 0.00016167188726285434, 'epoch': 2.0}
80
+ {'loss': 0.6562, 'grad_norm': 11.759211540222168, 'learning_rate': 0.00015680647467311557, 'epoch': 2.08}
81
+ {'loss': 1.5141, 'grad_norm': 96.2179183959961, 'learning_rate': 0.00015173378141776568, 'epoch': 2.16}
82
+ {'loss': 0.7753, 'grad_norm': 31.022045135498047, 'learning_rate': 0.00014647231720437686, 'epoch': 2.24}
83
+ {'eval_loss': 0.3771994113922119, 'eval_runtime': 1.3676, 'eval_samples_per_second': 8.775, 'eval_steps_per_second': 4.387, 'epoch': 2.24}
84
+ {'loss': 0.2649, 'grad_norm': 3.5004501342773438, 'learning_rate': 0.0001410412805452757, 'epoch': 2.32}
85
+ {'loss': 0.171, 'grad_norm': 5.16464376449585, 'learning_rate': 0.00013546048870425356, 'epoch': 2.4}
86
+ {'loss': 0.9172, 'grad_norm': 25.634010314941406, 'learning_rate': 0.00012975030538552032, 'epoch': 2.48}
87
+ {'loss': 0.3324, 'grad_norm': 7.102908134460449, 'learning_rate': 0.0001239315664287558, 'epoch': 2.56}
88
+ {'eval_loss': 0.29374203085899353, 'eval_runtime': 1.3678, 'eval_samples_per_second': 8.773, 'eval_steps_per_second': 4.387, 'epoch': 2.56}
89
+ {'loss': 0.4932, 'grad_norm': 6.236325263977051, 'learning_rate': 0.0001180255037813906, 'epoch': 2.64}
90
+ {'loss': 0.1284, 'grad_norm': 4.445058345794678, 'learning_rate': 0.0001120536680255323, 'epoch': 2.72}
91
+ {'loss': 0.1547, 'grad_norm': 6.94170618057251, 'learning_rate': 0.00010603784974222861, 'epoch': 2.8}
92
+ {'loss': 0.1973, 'grad_norm': 5.656033039093018, 'learning_rate': 0.0001, 'epoch': 2.88}
93
+ {'eval_loss': 0.5674905180931091, 'eval_runtime': 1.3681, 'eval_samples_per_second': 8.771, 'eval_steps_per_second': 4.386, 'epoch': 2.88}
94
+ {'loss': 0.4884, 'grad_norm': 18.19667625427246, 'learning_rate': 9.396215025777139e-05, 'epoch': 2.96}
95
+ {'loss': 0.5526, 'grad_norm': 17.964893341064453, 'learning_rate': 8.79463319744677e-05, 'epoch': 3.04}
96
+ {'loss': 0.2116, 'grad_norm': 5.015590190887451, 'learning_rate': 8.197449621860943e-05, 'epoch': 3.12}
97
+ {'loss': 0.0843, 'grad_norm': 5.6883225440979, 'learning_rate': 7.606843357124426e-05, 'epoch': 3.2}
98
+ {'eval_loss': 0.2360386848449707, 'eval_runtime': 1.3667, 'eval_samples_per_second': 8.78, 'eval_steps_per_second': 4.39, 'epoch': 3.2}
99
+ {'loss': 0.1158, 'grad_norm': 6.636446475982666, 'learning_rate': 7.024969461447972e-05, 'epoch': 3.28}
100
+ {'loss': 0.2755, 'grad_norm': 4.405576229095459, 'learning_rate': 6.453951129574644e-05, 'epoch': 3.36}
101
+ {'loss': 0.0186, 'grad_norm': 1.6179524660110474, 'learning_rate': 5.8958719454724346e-05, 'epoch': 3.44}
102
+ {'loss': 0.3836, 'grad_norm': 8.783114433288574, 'learning_rate': 5.3527682795623146e-05, 'epoch': 3.52}
103
+ {'eval_loss': 0.13969357311725616, 'eval_runtime': 1.3687, 'eval_samples_per_second': 8.767, 'eval_steps_per_second': 4.384, 'epoch': 3.52}
104
+ {'loss': 0.0141, 'grad_norm': 0.8835445046424866, 'learning_rate': 4.826621858223431e-05, 'epoch': 3.6}
105
+ {'loss': 0.6196, 'grad_norm': 12.678099632263184, 'learning_rate': 4.3193525326884435e-05, 'epoch': 3.68}
106
+ {'loss': 0.0948, 'grad_norm': 5.320870876312256, 'learning_rate': 3.832811273714569e-05, 'epoch': 3.76}
107
+ {'loss': 0.0449, 'grad_norm': 2.7501108646392822, 'learning_rate': 3.36877341759205e-05, 'epoch': 3.84}
108
+ {'eval_loss': 0.2801015079021454, 'eval_runtime': 1.3706, 'eval_samples_per_second': 8.755, 'eval_steps_per_second': 4.378, 'epoch': 3.84}
109
+ {'loss': 0.3026, 'grad_norm': 4.41072940826416, 'learning_rate': 2.9289321881345254e-05, 'epoch': 3.92}
110
+ {'loss': 0.0152, 'grad_norm': 1.2105910778045654, 'learning_rate': 2.514892518288988e-05, 'epoch': 4.0}
111
+ {'loss': 0.0629, 'grad_norm': 4.502895355224609, 'learning_rate': 2.1281651939094992e-05, 'epoch': 4.08}
112
+ {'loss': 0.2246, 'grad_norm': 6.058006286621094, 'learning_rate': 1.7701613410634365e-05, 'epoch': 4.16}
113
+ {'eval_loss': 0.19463467597961426, 'eval_runtime': 1.3725, 'eval_samples_per_second': 8.743, 'eval_steps_per_second': 4.372, 'epoch': 4.16}
114
+ {'loss': 0.0093, 'grad_norm': 0.5118169784545898, 'learning_rate': 1.442187276985526e-05, 'epoch': 4.24}
115
+ {'loss': 0.0148, 'grad_norm': 0.8497004508972168, 'learning_rate': 1.1454397434679021e-05, 'epoch': 4.32}
116
+ {'loss': 0.0392, 'grad_norm': 1.752151608467102, 'learning_rate': 8.810015400790994e-06, 'epoch': 4.4}
117
+ {'loss': 0.229, 'grad_norm': 3.6673429012298584, 'learning_rate': 6.498375731458528e-06, 'epoch': 4.48}
118
+ {'eval_loss': 0.16181980073451996, 'eval_runtime': 1.3705, 'eval_samples_per_second': 8.756, 'eval_steps_per_second': 4.378, 'epoch': 4.48}
119
+ {'loss': 0.1722, 'grad_norm': 2.9522616863250732, 'learning_rate': 4.527913349145441e-06, 'epoch': 4.56}
120
+ {'loss': 0.0295, 'grad_norm': 1.5037487745285034, 'learning_rate': 2.905818257394799e-06, 'epoch': 4.64}
121
+ {'loss': 0.03, 'grad_norm': 1.4181660413742065, 'learning_rate': 1.6380093052856483e-06, 'epoch': 4.72}
122
+ {'loss': 0.3073, 'grad_norm': 9.207091331481934, 'learning_rate': 7.291125901946027e-07, 'epoch': 4.8}
123
+ {'eval_loss': 0.14654164016246796, 'eval_runtime': 1.3704, 'eval_samples_per_second': 8.756, 'eval_steps_per_second': 4.378, 'epoch': 4.8}
124
+ {'loss': 0.032, 'grad_norm': 1.5023337602615356, 'learning_rate': 1.824445776682504e-07, 'epoch': 4.88}
125
+ {'loss': 0.1144, 'grad_norm': 2.882874011993408, 'learning_rate': 0.0, 'epoch': 4.96}
126
+ {'train_runtime': 206.4235, 'train_samples_per_second': 2.403, 'train_steps_per_second': 0.3, 'train_loss': 0.7901421915739775, 'epoch': 4.96}
127
+ [2024-04-09 08:33:03,093] [INFO] [axolotl.train.log:61] [PID:30736] [RANK:0] Training Completed!!! Saving pre-trained model to ./qlora-out
128
+ (PeftModelForCausalLM( (base_model): LoraModel( (model): MistralForCausalLM( (model): MistralModel( (embed_tokens): Embedding(32000, 4096) (layers): ModuleList( (0-31): 32 x MistralDecoderLayer( (self_attn): MistralFlashAttention2( (q_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=4096, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (k_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=1024, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (v_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=1024, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (o_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=4096, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (rotary_emb): MistralRotaryEmbedding() ) (mlp): MistralMLP( (gate_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=14336, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=14336, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (up_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=4096, out_features=14336, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=14336, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (down_proj): lora.Linear4bit( (base_layer): Linear4bit(in_features=14336, out_features=4096, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.05, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=14336, out_features=32, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=32, out_features=4096, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (act_fn): SiLU() ) (input_layernorm): MistralRMSNorm() (post_attention_layernorm): MistralRMSNorm() ) ) (norm): MistralRMSNorm() ) (lm_head): Linear(in_features=4096, out_features=32000, bias=False) ) ) ), LlamaTokenizer(name_or_path='mistralai/Mistral-7B-v0.1', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>'}, clean_up_tokenization_spaces=False), added_tokens_decoder={ 0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), 1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), 2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), })
129
+
130
+ End of job
131
+ Output file has been generated, please check /mnt/scratch/users/dhd2000/ft14/dmog/axolotl-test-outputs/test.output
finetune-test.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This script is used to test the model using a dataset
2
+
3
+ # Import the necessary libraries
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ from langchain.memory import ConversationBufferWindowMemory
6
+ from peft import PeftModel
7
+ import torch
8
+
9
+ import json
10
+ import sys
11
+
12
+ # Check if the correct number of arguments are provided
13
+ if len(sys.argv) != 2:
14
+ print("Usage: python finetune.py <jsonl_file>")
15
+ sys.exit(1)
16
+
17
+ # Get the file path from the command-line argument
18
+ jsonl_file_path = sys.argv[1]
19
+
20
+ # Load the model and tokenizer
21
+ base_model = "mistralai/Mistral-7B-Instruct-v0.2"
22
+ tokenizer = AutoTokenizer.from_pretrained(base_model)
23
+ tokenizer.add_special_tokens({"pad_token": "[PAD]"})
24
+ base_model = AutoModelForCausalLM.from_pretrained(base_model)
25
+
26
+
27
+ ft_model = PeftModel.from_pretrained(base_model, "./qlora-out")
28
+ # ft_model = ft_model.merge_and_unload()
29
+ ft_model.eval()
30
+
31
+ # Set the device
32
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33
+ ft_model.to(device)
34
+
35
+ # Read the JSONL file
36
+ with open(jsonl_file_path, "r") as f:
37
+ tp, tn, fp, fn = 0, 0, 0, 0
38
+ for line in f:
39
+ data = json.loads(line)
40
+ user_in = data["input"]
41
+ user_input = f"[INST] ###instruction: Check if the given traffic flow is normal or of an attacker or a victim\n###input: {user_in}\n#output: [/INST]"
42
+ encodings = tokenizer(user_input, return_tensors="pt", padding=True).to(device)
43
+ input_ids = encodings["input_ids"]
44
+ attention_mask = encodings["attention_mask"]
45
+
46
+ output_ids = ft_model.generate(input_ids, attention_mask = attention_mask, max_new_tokens=1000, num_return_sequences=1, do_sample=True, temperature=0.1, top_p=0.9)
47
+
48
+ generated_ids = output_ids[0, input_ids.shape[-1]:]
49
+
50
+ # Decode the output
51
+ response = tokenizer.decode(generated_ids, skip_special_tokens=True).lower()
52
+
53
+ # calculate true positive, true negative, false positive, false negative
54
+ if "normal" not in response and data["output"] == response:
55
+ tp += 1
56
+ elif "normal" in response and data["output"] == response:
57
+ tn += 1
58
+ elif "normal" in response and data["output"] != response:
59
+ fp += 1
60
+ elif "normal" not in response and data["output"] != response:
61
+ fn += 1
62
+ else:
63
+ print(f"Error: {response}, {data[output]}")
64
+ print(f"User input: {user_in}")
65
+ print(f"Generated response: {response}")
66
+ print(f"Expected response: {data[output]}")
67
+ print()
68
+
69
+ print(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
70
+
71
+
72
+
last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57a8a6b98277d114990fb441a27d2f777773005e6b7cf57a0ec219fe3bae40b1
3
+ size 259336
last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/dataset_info.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "input_ids": {
6
+ "feature": {
7
+ "dtype": "int32",
8
+ "_type": "Value"
9
+ },
10
+ "_type": "Sequence"
11
+ },
12
+ "labels": {
13
+ "feature": {
14
+ "dtype": "int64",
15
+ "_type": "Value"
16
+ },
17
+ "_type": "Sequence"
18
+ }
19
+ },
20
+ "homepage": "",
21
+ "license": ""
22
+ }
last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/state.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "992b9317aa372e8e",
8
+ "_format_columns": [
9
+ "input_ids",
10
+ "labels"
11
+ ],
12
+ "_format_kwargs": {},
13
+ "_format_type": null,
14
+ "_output_all_columns": false,
15
+ "_split": null
16
+ }
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/meta.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ artifact_uri: file:///mnt/scratch/users/dhd2000/ft14/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/artifacts
2
+ end_time: 1712647983089
3
+ entry_point_name: ''
4
+ experiment_id: '0'
5
+ lifecycle_stage: active
6
+ run_id: 7e75ece8e18e485db64e4e2d9196e738
7
+ run_name: ./qlora-out
8
+ run_uuid: 7e75ece8e18e485db64e4e2d9196e738
9
+ source_name: ''
10
+ source_type: 4
11
+ source_version: ''
12
+ start_time: 1712647776681
13
+ status: 3
14
+ tags: []
15
+ user_id: dhd2000
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/epoch ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1712647780322 0.08 1
2
+ 1712647781687 0.08 1
3
+ 1712647784574 0.16 2
4
+ 1712647787465 0.24 3
5
+ 1712647790359 0.32 4
6
+ 1712647791728 0.32 4
7
+ 1712647794625 0.4 5
8
+ 1712647797520 0.48 6
9
+ 1712647800416 0.56 7
10
+ 1712647803312 0.64 8
11
+ 1712647804683 0.64 8
12
+ 1712647807578 0.72 9
13
+ 1712647810474 0.8 10
14
+ 1712647813372 0.88 11
15
+ 1712647816270 0.96 12
16
+ 1712647817642 0.96 12
17
+ 1712647820386 1.04 13
18
+ 1712647824221 1.12 14
19
+ 1712647827138 1.2 15
20
+ 1712647830042 1.28 16
21
+ 1712647831420 1.28 16
22
+ 1712647834333 1.36 17
23
+ 1712647837242 1.44 18
24
+ 1712647840147 1.52 19
25
+ 1712647843070 1.6 20
26
+ 1712647844448 1.6 20
27
+ 1712647847363 1.68 21
28
+ 1712647850290 1.76 22
29
+ 1712647853203 1.84 23
30
+ 1712647856116 1.92 24
31
+ 1712647857496 1.92 24
32
+ 1712647860252 2.0 25
33
+ 1712647863163 2.08 26
34
+ 1712647866855 2.16 27
35
+ 1712647869769 2.24 28
36
+ 1712647871150 2.24 28
37
+ 1712647874087 2.32 29
38
+ 1712647877006 2.4 30
39
+ 1712647879921 2.48 31
40
+ 1712647882836 2.56 32
41
+ 1712647884219 2.56 32
42
+ 1712647887139 2.64 33
43
+ 1712647890053 2.72 34
44
+ 1712647892967 2.8 35
45
+ 1712647895882 2.88 36
46
+ 1712647897265 2.88 36
47
+ 1712647900187 2.96 37
48
+ 1712647902945 3.04 38
49
+ 1712647905861 3.12 39
50
+ 1712647909552 3.2 40
51
+ 1712647910933 3.2 40
52
+ 1712647913855 3.28 41
53
+ 1712647916777 3.36 42
54
+ 1712647919694 3.44 43
55
+ 1712647922609 3.52 44
56
+ 1712647923992 3.52 44
57
+ 1712647926911 3.6 45
58
+ 1712647929833 3.68 46
59
+ 1712647932754 3.76 47
60
+ 1712647935680 3.84 48
61
+ 1712647937065 3.84 48
62
+ 1712647939993 3.92 49
63
+ 1712647942758 4.0 50
64
+ 1712647945681 4.08 51
65
+ 1712647948606 4.16 52
66
+ 1712647949993 4.16 52
67
+ 1712647953856 4.24 53
68
+ 1712647956779 4.32 54
69
+ 1712647959701 4.4 55
70
+ 1712647962622 4.48 56
71
+ 1712647964007 4.48 56
72
+ 1712647966930 4.56 57
73
+ 1712647969855 4.64 58
74
+ 1712647972779 4.72 59
75
+ 1712647975702 4.8 60
76
+ 1712647977087 4.8 60
77
+ 1712647980013 4.88 61
78
+ 1712647982936 4.96 62
79
+ 1712647983084 4.96 62
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_loss ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1712647781687 7.300913333892822 1
2
+ 1712647791728 0.7137572169303894 4
3
+ 1712647804683 1.0445994138717651 8
4
+ 1712647817642 1.3959709405899048 12
5
+ 1712647831420 0.9023411870002747 16
6
+ 1712647844448 0.4847445785999298 20
7
+ 1712647857496 0.5740255117416382 24
8
+ 1712647871150 0.3771994113922119 28
9
+ 1712647884219 0.29374203085899353 32
10
+ 1712647897265 0.5674905180931091 36
11
+ 1712647910933 0.2360386848449707 40
12
+ 1712647923992 0.13969357311725616 44
13
+ 1712647937065 0.2801015079021454 48
14
+ 1712647949993 0.19463467597961426 52
15
+ 1712647964007 0.16181980073451996 56
16
+ 1712647977087 0.14654164016246796 60
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_runtime ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1712647781687 1.3523 1
2
+ 1712647791728 1.3532 4
3
+ 1712647804683 1.356 8
4
+ 1712647817642 1.3583 12
5
+ 1712647831420 1.3623 16
6
+ 1712647844448 1.3637 20
7
+ 1712647857496 1.3655 24
8
+ 1712647871150 1.3676 28
9
+ 1712647884219 1.3678 32
10
+ 1712647897265 1.3681 36
11
+ 1712647910933 1.3667 40
12
+ 1712647923992 1.3687 44
13
+ 1712647937065 1.3706 48
14
+ 1712647949993 1.3725 52
15
+ 1712647964007 1.3705 56
16
+ 1712647977087 1.3704 60
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_samples_per_second ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1712647781687 8.873 1
2
+ 1712647791728 8.868 4
3
+ 1712647804683 8.85 8
4
+ 1712647817642 8.835 12
5
+ 1712647831420 8.808 16
6
+ 1712647844448 8.799 20
7
+ 1712647857496 8.788 24
8
+ 1712647871150 8.775 28
9
+ 1712647884219 8.773 32
10
+ 1712647897265 8.771 36
11
+ 1712647910933 8.78 40
12
+ 1712647923992 8.767 44
13
+ 1712647937065 8.755 48
14
+ 1712647949993 8.743 52
15
+ 1712647964007 8.756 56
16
+ 1712647977087 8.756 60
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_steps_per_second ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1712647781687 4.437 1
2
+ 1712647791728 4.434 4
3
+ 1712647804683 4.425 8
4
+ 1712647817642 4.417 12
5
+ 1712647831420 4.404 16
6
+ 1712647844448 4.4 20
7
+ 1712647857496 4.394 24
8
+ 1712647871150 4.387 28
9
+ 1712647884219 4.387 32
10
+ 1712647897265 4.386 36
11
+ 1712647910933 4.39 40
12
+ 1712647923992 4.384 44
13
+ 1712647937065 4.378 48
14
+ 1712647949993 4.372 52
15
+ 1712647964007 4.378 56
16
+ 1712647977087 4.378 60
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/grad_norm ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1712647780322 102.28898620605469 1
2
+ 1712647784574 103.4541015625 2
3
+ 1712647787465 67.47515869140625 3
4
+ 1712647790359 72.36919403076172 4
5
+ 1712647794625 16.83085060119629 5
6
+ 1712647797520 25.897714614868164 6
7
+ 1712647800416 18.89151382446289 7
8
+ 1712647803312 27.15555763244629 8
9
+ 1712647807578 20.812381744384766 9
10
+ 1712647810474 56.3886604309082 10
11
+ 1712647813372 6.24803352355957 11
12
+ 1712647816270 8.379430770874023 12
13
+ 1712647820386 20.744348526000977 13
14
+ 1712647824221 4.800480842590332 14
15
+ 1712647827138 11.284302711486816 15
16
+ 1712647830042 5.787976264953613 16
17
+ 1712647834333 21.48629379272461 17
18
+ 1712647837242 38.0982666015625 18
19
+ 1712647840147 10.824837684631348 19
20
+ 1712647843070 9.1353120803833 20
21
+ 1712647847363 3.8411033153533936 21
22
+ 1712647850290 23.888507843017578 22
23
+ 1712647853203 8.5956392288208 23
24
+ 1712647856116 3.825141191482544 24
25
+ 1712647860252 3.558993101119995 25
26
+ 1712647863163 11.759211540222168 26
27
+ 1712647866855 96.2179183959961 27
28
+ 1712647869769 31.022045135498047 28
29
+ 1712647874087 3.5004501342773438 29
30
+ 1712647877006 5.16464376449585 30
31
+ 1712647879921 25.634010314941406 31
32
+ 1712647882836 7.102908134460449 32
33
+ 1712647887139 6.236325263977051 33
34
+ 1712647890053 4.445058345794678 34
35
+ 1712647892967 6.94170618057251 35
36
+ 1712647895882 5.656033039093018 36
37
+ 1712647900187 18.19667625427246 37
38
+ 1712647902945 17.964893341064453 38
39
+ 1712647905861 5.015590190887451 39
40
+ 1712647909552 5.6883225440979 40
41
+ 1712647913855 6.636446475982666 41
42
+ 1712647916777 4.405576229095459 42
43
+ 1712647919694 1.6179524660110474 43
44
+ 1712647922609 8.783114433288574 44
45
+ 1712647926911 0.8835445046424866 45
46
+ 1712647929833 12.678099632263184 46
47
+ 1712647932754 5.320870876312256 47
48
+ 1712647935680 2.7501108646392822 48
49
+ 1712647939993 4.41072940826416 49
50
+ 1712647942758 1.2105910778045654 50
51
+ 1712647945681 4.502895355224609 51
52
+ 1712647948606 6.058006286621094 52
53
+ 1712647953856 0.5118169784545898 53
54
+ 1712647956779 0.8497004508972168 54
55
+ 1712647959701 1.752151608467102 55
56
+ 1712647962622 3.6673429012298584 56
57
+ 1712647966930 2.9522616863250732 57
58
+ 1712647969855 1.5037487745285034 58
59
+ 1712647972779 1.4181660413742065 59
60
+ 1712647975702 9.207091331481934 60
61
+ 1712647980013 1.5023337602615356 61
62
+ 1712647982936 2.882874011993408 62
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/learning_rate ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1712647780322 2e-05 1
2
+ 1712647784574 4e-05 2
3
+ 1712647787465 6e-05 3
4
+ 1712647790359 8e-05 4
5
+ 1712647794625 0.0001 5
6
+ 1712647797520 0.00012 6
7
+ 1712647800416 0.00014 7
8
+ 1712647803312 0.00016 8
9
+ 1712647807578 0.00018 9
10
+ 1712647810474 0.0002 10
11
+ 1712647813372 0.00019981755542233177 11
12
+ 1712647816270 0.0001992708874098054 12
13
+ 1712647820386 0.00019836199069471437 13
14
+ 1712647824221 0.0001970941817426052 14
15
+ 1712647827138 0.00019547208665085457 15
16
+ 1712647830042 0.0001935016242685415 16
17
+ 1712647834333 0.00019118998459920902 17
18
+ 1712647837242 0.000188545602565321 18
19
+ 1712647840147 0.00018557812723014476 19
20
+ 1712647843070 0.00018229838658936564 20
21
+ 1712647847363 0.00017871834806090501 21
22
+ 1712647850290 0.00017485107481711012 22
23
+ 1712647853203 0.00017071067811865476 23
24
+ 1712647856116 0.00016631226582407952 24
25
+ 1712647860252 0.00016167188726285434 25
26
+ 1712647863163 0.00015680647467311557 26
27
+ 1712647866855 0.00015173378141776568 27
28
+ 1712647869769 0.00014647231720437686 28
29
+ 1712647874087 0.0001410412805452757 29
30
+ 1712647877006 0.00013546048870425356 30
31
+ 1712647879921 0.00012975030538552032 31
32
+ 1712647882836 0.0001239315664287558 32
33
+ 1712647887139 0.0001180255037813906 33
34
+ 1712647890053 0.0001120536680255323 34
35
+ 1712647892967 0.00010603784974222861 35
36
+ 1712647895882 0.0001 36
37
+ 1712647900187 9.396215025777139e-05 37
38
+ 1712647902945 8.79463319744677e-05 38
39
+ 1712647905861 8.197449621860943e-05 39
40
+ 1712647909552 7.606843357124426e-05 40
41
+ 1712647913855 7.024969461447972e-05 41
42
+ 1712647916777 6.453951129574644e-05 42
43
+ 1712647919694 5.8958719454724346e-05 43
44
+ 1712647922609 5.3527682795623146e-05 44
45
+ 1712647926911 4.826621858223431e-05 45
46
+ 1712647929833 4.3193525326884435e-05 46
47
+ 1712647932754 3.832811273714569e-05 47
48
+ 1712647935680 3.36877341759205e-05 48
49
+ 1712647939993 2.9289321881345254e-05 49
50
+ 1712647942758 2.514892518288988e-05 50
51
+ 1712647945681 2.1281651939094992e-05 51
52
+ 1712647948606 1.7701613410634365e-05 52
53
+ 1712647953856 1.442187276985526e-05 53
54
+ 1712647956779 1.1454397434679021e-05 54
55
+ 1712647959701 8.810015400790994e-06 55
56
+ 1712647962622 6.498375731458528e-06 56
57
+ 1712647966930 4.527913349145441e-06 57
58
+ 1712647969855 2.905818257394799e-06 58
59
+ 1712647972779 1.6380093052856483e-06 59
60
+ 1712647975702 7.291125901946027e-07 60
61
+ 1712647980013 1.824445776682504e-07 61
62
+ 1712647982936 0.0 62
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/loss ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1712647780322 6.6367 1
2
+ 1712647784574 7.0616 2
3
+ 1712647787465 4.686 3
4
+ 1712647790359 2.3866 4
5
+ 1712647794625 0.6844 5
6
+ 1712647797520 0.914 6
7
+ 1712647800416 0.63 7
8
+ 1712647803312 0.948 8
9
+ 1712647807578 1.0285 9
10
+ 1712647810474 1.3756 10
11
+ 1712647813372 0.5178 11
12
+ 1712647816270 0.6822 12
13
+ 1712647820386 1.3762 13
14
+ 1712647824221 0.5248 14
15
+ 1712647827138 0.8094 15
16
+ 1712647830042 0.5222 16
17
+ 1712647834333 0.8027 17
18
+ 1712647837242 1.7772 18
19
+ 1712647840147 0.7737 19
20
+ 1712647843070 0.534 20
21
+ 1712647847363 0.3201 21
22
+ 1712647850290 2.2541 22
23
+ 1712647853203 0.8177 23
24
+ 1712647856116 0.4624 24
25
+ 1712647860252 0.3714 25
26
+ 1712647863163 0.6562 26
27
+ 1712647866855 1.5141 27
28
+ 1712647869769 0.7753 28
29
+ 1712647874087 0.2649 29
30
+ 1712647877006 0.171 30
31
+ 1712647879921 0.9172 31
32
+ 1712647882836 0.3324 32
33
+ 1712647887139 0.4932 33
34
+ 1712647890053 0.1284 34
35
+ 1712647892967 0.1547 35
36
+ 1712647895882 0.1973 36
37
+ 1712647900187 0.4884 37
38
+ 1712647902945 0.5526 38
39
+ 1712647905861 0.2116 39
40
+ 1712647909552 0.0843 40
41
+ 1712647913855 0.1158 41
42
+ 1712647916777 0.2755 42
43
+ 1712647919694 0.0186 43
44
+ 1712647922609 0.3836 44
45
+ 1712647926911 0.0141 45
46
+ 1712647929833 0.6196 46
47
+ 1712647932754 0.0948 47
48
+ 1712647935680 0.0449 48
49
+ 1712647939993 0.3026 49
50
+ 1712647942758 0.0152 50
51
+ 1712647945681 0.0629 51
52
+ 1712647948606 0.2246 52
53
+ 1712647953856 0.0093 53
54
+ 1712647956779 0.0148 54
55
+ 1712647959701 0.0392 55
56
+ 1712647962622 0.229 56
57
+ 1712647966930 0.1722 57
58
+ 1712647969855 0.0295 58
59
+ 1712647972779 0.03 59
60
+ 1712647975702 0.3073 60
61
+ 1712647980013 0.032 61
62
+ 1712647982936 0.1144 62
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/total_flos ADDED
@@ -0,0 +1 @@
 
 
1
+ 1712647983084 5437004879757312.0 62
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_loss ADDED
@@ -0,0 +1 @@
 
 
1
+ 1712647983084 0.7901421915739775 62
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_runtime ADDED
@@ -0,0 +1 @@
 
 
1
+ 1712647983084 206.4235 62
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_samples_per_second ADDED
@@ -0,0 +1 @@
 
 
1
+ 1712647983084 2.403 62
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_steps_per_second ADDED
@@ -0,0 +1 @@
 
 
1
+ 1712647983084 0.3 62
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/_name_or_path ADDED
@@ -0,0 +1 @@
 
 
1
+ mistralai/Mistral-7B-v0.1
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/accelerator_config ADDED
@@ -0,0 +1 @@
 
 
1
+ {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adafactor ADDED
@@ -0,0 +1 @@
 
 
1
+ False
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta1 ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.9
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta2 ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.999
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_epsilon ADDED
@@ -0,0 +1 @@
 
 
1
+ 1e-08
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/add_cross_attention ADDED
@@ -0,0 +1 @@
 
 
1
+ False
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/architectures ADDED
@@ -0,0 +1 @@
 
 
1
+ ['MistralForCausalLM']
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/attention_dropout ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.0
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/auto_find_batch_size ADDED
@@ -0,0 +1 @@
 
 
1
+ False
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bad_words_ids ADDED
@@ -0,0 +1 @@
 
 
1
+ None
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/begin_suppress_tokens ADDED
@@ -0,0 +1 @@
 
 
1
+ None
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_dataset ADDED
@@ -0,0 +1 @@
 
 
1
+ pharaouk/dharma-1/dharma_1_mini.json
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_source_max_len ADDED
@@ -0,0 +1 @@
 
 
1
+ 2048
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_split ADDED
@@ -0,0 +1 @@
 
 
1
+ eval
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16 ADDED
@@ -0,0 +1 @@
 
 
1
+ True
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16_full_eval ADDED
@@ -0,0 +1 @@
 
 
1
+ False
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bos_token_id ADDED
@@ -0,0 +1 @@
 
 
1
+ 1
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/chunk_size_feed_forward ADDED
@@ -0,0 +1 @@
 
 
1
+ 0
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_constant_lr_ratio ADDED
@@ -0,0 +1 @@
 
 
1
+ None
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_min_lr_ratio ADDED
@@ -0,0 +1 @@
 
 
1
+ None
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cross_attention_hidden_size ADDED
@@ -0,0 +1 @@
 
 
1
+ None
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/data_seed ADDED
@@ -0,0 +1 @@
 
 
1
+ None
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_drop_last ADDED
@@ -0,0 +1 @@
 
 
1
+ False
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_num_workers ADDED
@@ -0,0 +1 @@
 
 
1
+ 0
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_persistent_workers ADDED
@@ -0,0 +1 @@
 
 
1
+ False
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_pin_memory ADDED
@@ -0,0 +1 @@
 
 
1
+ True
mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_prefetch_factor ADDED
@@ -0,0 +1 @@
 
 
1
+ None