Asimok commited on
Commit
5126c18
·
1 Parent(s): f487920

Upload 10 files

Browse files
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.4878490357171921,
4
+ "train_runtime": 6234.3462,
5
+ "train_samples_per_second": 0.405,
6
+ "train_steps_per_second": 0.034
7
+ }
final/README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: float16
18
+ ### Framework versions
19
+
20
+
21
+ - PEFT 0.5.0
final/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/data0/maqi/huggingface_models/option2-models/option2-race_ft_alpaca",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 64,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 128,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "k_proj",
18
+ "down_proj",
19
+ "o_proj",
20
+ "up_proj",
21
+ "v_proj",
22
+ "gate_proj",
23
+ "q_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
final/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b95ac580582a8c312bc9322f60dec4156d7ae6b7abde810cf61bf835b6fe5366
3
+ size 1279424269
final/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38f7ee24d5e0d3b7bc1dd6673b48027ffa9a246f24451593591ed366f994c60c
3
+ size 4091
race_ft_alpaca_1_quality_2.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_dir": "output/option-2/QuALITY/race_ft_alpaca_1_quality_2",
3
+ "model_name_or_path": "/data0/maqi/huggingface_models/option2-models/option2-race_ft_alpaca",
4
+ "train_file": "/data0/maqi/KGLQA-data/datasets/QuALITY/Caption/quality_caption_and_rel_instruct/train.jsonl",
5
+ "num_train_epochs": 1,
6
+ "per_device_train_batch_size": 6,
7
+ "gradient_accumulation_steps": 2,
8
+ "learning_rate": 1e-4,
9
+ "max_seq_length": 2048,
10
+ "logging_steps": 10,
11
+ "save_steps": 100,
12
+ "save_total_limit": 1,
13
+ "lr_scheduler_type": "constant_with_warmup",
14
+ "warmup_ratio": 0.1,
15
+ "lora_rank": 128,
16
+ "lora_alpha": 64,
17
+ "lora_dropout": 0.05,
18
+
19
+ "gradient_checkpointing": true,
20
+ "disable_tqdm": false,
21
+ "optim": "paged_adamw_32bit",
22
+ "seed": 318,
23
+ "fp16": true,
24
+ "report_to": "tensorboard",
25
+ "dataloader_num_workers": 10,
26
+ "save_strategy": "steps",
27
+ "weight_decay": 0,
28
+ "max_grad_norm": 0.3,
29
+ "remove_unused_columns": false
30
+ }
runs/Dec30_17-59-51_u747/events.out.tfevents.1703930449.u747.874524.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e99fd7919ea4ab5d608e6fec043cb7e8f1f56cc6fe40e5e2751193b448879718
3
+ size 9420
train.log ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/210 [00:00<?, ?it/s]
1
  0%| | 1/210 [00:33<1:58:19, 33.97s/it]
2
  1%| | 2/210 [01:03<1:48:04, 31.18s/it]
3
  1%|▏ | 3/210 [01:32<1:44:41, 30.34s/it]
4
  2%|▏ | 4/210 [02:01<1:42:54, 29.97s/it]
5
  2%|▏ | 5/210 [02:31<1:41:41, 29.76s/it]
6
  3%|▎ | 6/210 [03:01<1:41:11, 29.76s/it]
7
  3%|▎ | 7/210 [03:30<1:40:18, 29.65s/it]
8
  4%|▍ | 8/210 [04:00<1:39:55, 29.68s/it]
9
  4%|▍ | 9/210 [04:29<1:39:06, 29.58s/it]
10
  5%|▍ | 10/210 [04:58<1:38:23, 29.52s/it]
11
 
 
12
  5%|▍ | 10/210 [04:59<1:38:23, 29.52s/it]
13
  5%|▌ | 11/210 [05:28<1:38:06, 29.58s/it]
14
  6%|▌ | 12/210 [05:58<1:37:45, 29.62s/it]
15
  6%|▌ | 13/210 [06:28<1:37:21, 29.65s/it]
16
  7%|▋ | 14/210 [06:57<1:36:36, 29.57s/it]
17
  7%|▋ | 15/210 [07:27<1:36:17, 29.63s/it]
18
  8%|▊ | 16/210 [07:57<1:35:56, 29.67s/it]
19
  8%|▊ | 17/210 [08:26<1:35:31, 29.70s/it]
20
  9%|▊ | 18/210 [08:56<1:35:05, 29.71s/it]
21
  9%|▉ | 19/210 [09:27<1:35:15, 29.92s/it]
22
  10%|▉ | 20/210 [09:57<1:34:55, 29.97s/it]
23
 
 
24
  10%|▉ | 20/210 [09:57<1:34:55, 29.97s/it]
25
  10%|█ | 21/210 [10:26<1:33:54, 29.81s/it]
26
  10%|█ | 22/210 [10:56<1:33:39, 29.89s/it]
27
  11%|█ | 23/210 [11:26<1:32:43, 29.75s/it]
28
  11%|█▏ | 24/210 [11:55<1:31:56, 29.66s/it]
29
  12%|█▏ | 25/210 [12:24<1:31:14, 29.59s/it]
30
  12%|█▏ | 26/210 [12:54<1:30:36, 29.55s/it]
31
  13%|█▎ | 27/210 [13:24<1:30:16, 29.60s/it]
32
  13%|█▎ | 28/210 [13:53<1:29:29, 29.50s/it]
33
  14%|█▍ | 29/210 [14:23<1:29:11, 29.57s/it]
34
  14%|█▍ | 30/210 [14:53<1:29:08, 29.72s/it]
35
 
 
36
  14%|█▍ | 30/210 [14:53<1:29:08, 29.72s/it]
37
  15%|█▍ | 31/210 [15:22<1:28:25, 29.64s/it]
38
  15%|█▌ | 32/210 [15:52<1:28:16, 29.76s/it]
39
  16%|█▌ | 33/210 [16:22<1:27:45, 29.75s/it]
40
  16%|█▌ | 34/210 [16:51<1:26:58, 29.65s/it]
41
  17%|█▋ | 35/210 [17:21<1:26:17, 29.59s/it]
42
  17%|█▋ | 36/210 [17:50<1:25:39, 29.54s/it]
43
  18%|█▊ | 37/210 [18:20<1:25:38, 29.70s/it]
44
  18%|█▊ | 38/210 [18:50<1:24:54, 29.62s/it]
45
  19%|█▊ | 39/210 [19:19<1:24:15, 29.56s/it]
46
  19%|█▉ | 40/210 [19:49<1:24:10, 29.71s/it]
47
 
 
48
  19%|█▉ | 40/210 [19:49<1:24:10, 29.71s/it]
49
  20%|█▉ | 41/210 [20:19<1:23:43, 29.73s/it]
50
  20%|██ | 42/210 [20:48<1:23:00, 29.64s/it]
51
  20%|██ | 43/210 [21:18<1:22:36, 29.68s/it]
52
  21%|██ | 44/210 [21:48<1:22:11, 29.71s/it]
53
  21%|██▏ | 45/210 [22:18<1:22:00, 29.82s/it]
54
  22%|██▏ | 46/210 [22:48<1:21:27, 29.80s/it]
55
  22%|██▏ | 47/210 [23:18<1:20:56, 29.80s/it]
56
  23%|██▎ | 48/210 [23:47<1:20:09, 29.69s/it]
57
  23%|██▎ | 49/210 [24:16<1:19:28, 29.62s/it]
58
  24%|██▍ | 50/210 [24:46<1:19:05, 29.66s/it]
59
 
 
60
  24%|██▍ | 50/210 [24:46<1:19:05, 29.66s/it]
61
  24%|██▍ | 51/210 [25:16<1:18:25, 29.60s/it]
62
  25%|██▍ | 52/210 [25:45<1:17:48, 29.55s/it]
63
  25%|██▌ | 53/210 [26:15<1:17:28, 29.61s/it]
64
  26%|██▌ | 54/210 [26:44<1:16:50, 29.55s/it]
65
  26%|██▌ | 55/210 [27:14<1:16:45, 29.71s/it]
66
  27%|██▋ | 56/210 [27:44<1:16:18, 29.73s/it]
67
  27%|██▋ | 57/210 [28:14<1:15:50, 29.74s/it]
68
  28%|██▊ | 58/210 [28:44<1:15:22, 29.75s/it]
69
  28%|██▊ | 59/210 [29:13<1:14:53, 29.76s/it]
70
  29%|██▊ | 60/210 [29:43<1:14:09, 29.66s/it]
71
 
 
72
  29%|██▊ | 60/210 [29:43<1:14:09, 29.66s/it]
73
  29%|██▉ | 61/210 [30:12<1:13:31, 29.61s/it]
74
  30%|██▉ | 62/210 [30:42<1:12:55, 29.56s/it]
75
  30%|███ | 63/210 [31:12<1:12:35, 29.63s/it]
76
  30%|███ | 64/210 [31:41<1:11:56, 29.56s/it]
77
  31%|███ | 65/210 [32:10<1:11:23, 29.54s/it]
78
  31%|███▏ | 66/210 [32:40<1:11:03, 29.61s/it]
79
  32%|███▏ | 67/210 [33:10<1:10:27, 29.56s/it]
80
  32%|███▏ | 68/210 [33:39<1:09:52, 29.53s/it]
81
  33%|███▎ | 69/210 [34:09<1:09:19, 29.50s/it]
82
  33%|███▎ | 70/210 [34:38<1:08:48, 29.49s/it]
83
 
 
84
  33%|███▎ | 70/210 [34:38<1:08:48, 29.49s/it]
85
  34%|███▍ | 71/210 [35:07<1:08:18, 29.48s/it]
86
  34%|███▍ | 72/210 [35:37<1:07:47, 29.48s/it]
87
  35%|███▍ | 73/210 [36:06<1:07:16, 29.46s/it]
88
  35%|███▌ | 74/210 [36:36<1:07:00, 29.56s/it]
89
  36%|███▌ | 75/210 [37:06<1:06:26, 29.53s/it]
90
  36%|███▌ | 76/210 [37:35<1:06:05, 29.59s/it]
91
  37%|███▋ | 77/210 [38:05<1:05:41, 29.64s/it]
92
  37%|███▋ | 78/210 [38:35<1:05:03, 29.57s/it]
93
  38%|███▊ | 79/210 [39:04<1:04:29, 29.54s/it]
94
  38%|███▊ | 80/210 [39:33<1:03:55, 29.51s/it]
95
 
 
96
  38%|███▊ | 80/210 [39:33<1:03:55, 29.51s/it]
97
  39%|███▊ | 81/210 [40:03<1:03:34, 29.57s/it]
98
  39%|███▉ | 82/210 [40:33<1:03:12, 29.63s/it]
99
  40%|███▉ | 83/210 [41:03<1:02:47, 29.67s/it]
100
  40%|████ | 84/210 [41:32<1:02:21, 29.70s/it]
101
  40%|████ | 85/210 [42:02<1:01:42, 29.62s/it]
102
  41%|████ | 86/210 [42:31<1:01:05, 29.56s/it]
103
  41%|████▏ | 87/210 [43:01<1:00:32, 29.53s/it]
104
  42%|████▏ | 88/210 [43:30<59:59, 29.51s/it]
105
  42%|████▏ | 89/210 [44:00<59:38, 29.58s/it]
106
  43%|████▎ | 90/210 [44:29<59:04, 29.53s/it]
107
 
 
108
  43%|████▎ | 90/210 [44:29<59:04, 29.53s/it]
109
  43%|████▎ | 91/210 [44:59<58:43, 29.61s/it]
110
  44%|████▍ | 92/210 [45:29<58:18, 29.65s/it]
111
  44%|████▍ | 93/210 [45:58<57:41, 29.59s/it]
112
  45%|████▍ | 94/210 [46:28<57:29, 29.74s/it]
113
  45%|████▌ | 95/210 [46:58<57:00, 29.74s/it]
114
  46%|████▌ | 96/210 [47:28<56:19, 29.64s/it]
115
  46%|████▌ | 97/210 [47:57<55:41, 29.57s/it]
116
  47%|████▋ | 98/210 [48:27<55:18, 29.63s/it]
117
  47%|████▋ | 99/210 [48:56<54:42, 29.57s/it]
118
  48%|████▊ | 100/210 [49:26<54:18, 29.62s/it]
119
 
 
120
  48%|████▊ | 100/210 [49:26<54:18, 29.62s/it]
121
  48%|████▊ | 101/210 [49:59<55:56, 30.80s/it]
122
  49%|████▊ | 102/210 [50:29<54:42, 30.39s/it]
123
  49%|████▉ | 103/210 [50:59<53:50, 30.20s/it]
124
  50%|████▉ | 104/210 [51:28<52:57, 29.98s/it]
125
  50%|█████ | 105/210 [51:58<52:20, 29.91s/it]
126
  50%|█████ | 106/210 [52:27<51:35, 29.77s/it]
127
  51%|█████ | 107/210 [52:57<51:05, 29.77s/it]
128
  51%|█████▏ | 108/210 [53:27<50:35, 29.76s/it]
129
  52%|█████▏ | 109/210 [53:56<49:54, 29.65s/it]
130
  52%|█████▏ | 110/210 [54:26<49:19, 29.59s/it]
131
 
 
132
  52%|█████▏ | 110/210 [54:26<49:19, 29.59s/it]
133
  53%|█████▎ | 111/210 [54:55<48:54, 29.64s/it]
134
  53%|█████▎ | 112/210 [55:25<48:16, 29.56s/it]
135
  54%|█████▍ | 113/210 [55:54<47:44, 29.53s/it]
136
  54%|█████▍ | 114/210 [56:24<47:21, 29.59s/it]
137
  55%|█████▍ | 115/210 [56:53<46:46, 29.54s/it]
138
  55%|█████▌ | 116/210 [57:24<46:31, 29.70s/it]
139
  56%|█████▌ | 117/210 [57:53<46:03, 29.71s/it]
140
  56%|█████▌ | 118/210 [58:23<45:25, 29.63s/it]
141
  57%|█████▋ | 119/210 [58:52<44:59, 29.66s/it]
142
  57%|█████▋ | 120/210 [59:22<44:23, 29.59s/it]
143
 
 
144
  57%|█████▋ | 120/210 [59:22<44:23, 29.59s/it]
145
  58%|█████▊ | 121/210 [59:52<43:55, 29.61s/it]
146
  58%|█████▊ | 122/210 [1:00:21<43:29, 29.65s/it]
147
  59%|█████▊ | 123/210 [1:00:51<42:53, 29.58s/it]
148
  59%|█████▉ | 124/210 [1:01:20<42:19, 29.53s/it]
149
  60%|█████▉ | 125/210 [1:01:50<41:56, 29.60s/it]
150
  60%|██████ | 126/210 [1:02:19<41:21, 29.54s/it]
151
  60%|██████ | 127/210 [1:02:49<40:48, 29.50s/it]
152
  61%|██████ | 128/210 [1:03:18<40:19, 29.50s/it]
153
  61%|██████▏ | 129/210 [1:03:48<39:55, 29.58s/it]
154
  62%|██████▏ | 130/210 [1:04:17<39:22, 29.53s/it]
155
 
 
156
  62%|██████▏ | 130/210 [1:04:17<39:22, 29.53s/it]
157
  62%|██████▏ | 131/210 [1:04:47<38:49, 29.49s/it]
158
  63%|██████▎ | 132/210 [1:05:16<38:18, 29.47s/it]
159
  63%|██████▎ | 133/210 [1:05:46<37:47, 29.44s/it]
160
  64%|██████▍ | 134/210 [1:06:15<37:16, 29.43s/it]
161
  64%|██████▍ | 135/210 [1:06:45<36:53, 29.52s/it]
162
  65%|██████▍ | 136/210 [1:07:14<36:21, 29.49s/it]
163
  65%|██████▌ | 137/210 [1:07:44<35:51, 29.47s/it]
164
  66%|██████▌ | 138/210 [1:08:13<35:27, 29.55s/it]
165
  66%|██████▌ | 139/210 [1:08:43<35:02, 29.61s/it]
166
  67%|██████▋ | 140/210 [1:09:13<34:35, 29.65s/it]
167
 
 
168
  67%|██████▋ | 140/210 [1:09:13<34:35, 29.65s/it]
169
  67%|██████▋ | 141/210 [1:09:42<34:07, 29.67s/it]
170
  68%|██████▊ | 142/210 [1:10:12<33:39, 29.69s/it]
171
  68%|██████▊ | 143/210 [1:10:42<33:16, 29.80s/it]
172
  69%|██████▊ | 144/210 [1:11:12<32:39, 29.69s/it]
173
  69%|██████▉ | 145/210 [1:11:41<32:10, 29.70s/it]
174
  70%|██████▉ | 146/210 [1:12:11<31:35, 29.62s/it]
175
  70%|███████ | 147/210 [1:12:40<31:02, 29.56s/it]
176
  70%|███████ | 148/210 [1:13:10<30:30, 29.52s/it]
177
  71%|███████ | 149/210 [1:13:39<29:58, 29.49s/it]
178
  71%|███████▏ | 150/210 [1:14:09<29:28, 29.47s/it]
179
 
 
180
  71%|███████▏ | 150/210 [1:14:09<29:28, 29.47s/it]
181
  72%|███████▏ | 151/210 [1:14:39<29:08, 29.64s/it]
182
  72%|███████▏ | 152/210 [1:15:08<28:35, 29.58s/it]
183
  73%|███████▎ | 153/210 [1:15:38<28:14, 29.73s/it]
184
  73%|███████▎ | 154/210 [1:16:07<27:39, 29.63s/it]
185
  74%|███████▍ | 155/210 [1:16:37<27:10, 29.65s/it]
186
  74%|███████▍ | 156/210 [1:17:07<26:37, 29.58s/it]
187
  75%|███████▍ | 157/210 [1:17:36<26:04, 29.52s/it]
188
  75%|███████▌ | 158/210 [1:18:06<25:36, 29.55s/it]
189
  76%|███████▌ | 159/210 [1:18:35<25:09, 29.60s/it]
190
  76%|███████▌ | 160/210 [1:19:05<24:42, 29.65s/it]
191
 
 
192
  76%|███████▌ | 160/210 [1:19:05<24:42, 29.65s/it]
193
  77%|███████▋ | 161/210 [1:19:35<24:14, 29.68s/it]
194
  77%|███████▋ | 162/210 [1:20:04<23:41, 29.60s/it]
195
  78%|███████▊ | 163/210 [1:20:34<23:12, 29.64s/it]
196
  78%|███████▊ | 164/210 [1:21:04<22:44, 29.66s/it]
197
  79%|███████▊ | 165/210 [1:21:33<22:15, 29.67s/it]
198
  79%|███████▉ | 166/210 [1:22:03<21:40, 29.56s/it]
199
  80%|███████▉ | 167/210 [1:22:32<21:08, 29.51s/it]
200
  80%|████████ | 168/210 [1:23:01<20:38, 29.48s/it]
201
  80%|████████ | 169/210 [1:23:31<20:11, 29.56s/it]
202
  81%|████████ | 170/210 [1:24:01<19:40, 29.51s/it]
203
 
 
204
  81%|████████ | 170/210 [1:24:01<19:40, 29.51s/it]
205
  81%|████████▏ | 171/210 [1:24:30<19:13, 29.57s/it]
206
  82%|████████▏ | 172/210 [1:25:00<18:42, 29.53s/it]
207
  82%|████████▏ | 173/210 [1:25:30<18:18, 29.68s/it]
208
  83%|████████▎ | 174/210 [1:25:59<17:45, 29.61s/it]
209
  83%|████████▎ | 175/210 [1:26:29<17:14, 29.54s/it]
210
  84%|████████▍ | 176/210 [1:26:58<16:43, 29.51s/it]
211
  84%|████████▍ | 177/210 [1:27:28<16:15, 29.57s/it]
212
  85%|████████▍ | 178/210 [1:27:57<15:44, 29.52s/it]
213
  85%|████████▌ | 179/210 [1:28:27<15:13, 29.48s/it]
214
  86%|████████▌ | 180/210 [1:28:56<14:43, 29.44s/it]
215
 
 
216
  86%|████████▌ | 180/210 [1:28:56<14:43, 29.44s/it]
217
  86%|████████▌ | 181/210 [1:29:26<14:16, 29.52s/it]
218
  87%|████████▋ | 182/210 [1:29:55<13:45, 29.48s/it]
219
  87%|████████▋ | 183/210 [1:30:25<13:17, 29.55s/it]
220
  88%|████████▊ | 184/210 [1:30:54<12:49, 29.60s/it]
221
  88%|████████▊ | 185/210 [1:31:24<12:18, 29.54s/it]
222
  89%|████████▊ | 186/210 [1:31:53<11:47, 29.50s/it]
223
  89%|████████▉ | 187/210 [1:32:23<11:17, 29.47s/it]
224
  90%|████████▉ | 188/210 [1:32:52<10:49, 29.54s/it]
225
  90%|█████████ | 189/210 [1:33:22<10:21, 29.60s/it]
226
  90%|█████████ | 190/210 [1:33:52<09:52, 29.64s/it]
227
 
 
228
  90%|█████████ | 190/210 [1:33:52<09:52, 29.64s/it]
229
  91%|█████████ | 191/210 [1:34:22<09:25, 29.76s/it]
230
  91%|█████████▏| 192/210 [1:34:52<08:55, 29.75s/it]
231
  92%|█████████▏| 193/210 [1:35:21<08:25, 29.75s/it]
232
  92%|█████████▏| 194/210 [1:35:51<07:55, 29.74s/it]
233
  93%|█████████▎| 195/210 [1:36:20<07:24, 29.65s/it]
234
  93%|█████████▎| 196/210 [1:36:50<06:55, 29.67s/it]
235
  94%|█████████▍| 197/210 [1:37:20<06:27, 29.79s/it]
236
  94%|█████████▍| 198/210 [1:37:50<05:56, 29.71s/it]
237
  95%|█████████▍| 199/210 [1:38:19<05:25, 29.61s/it]
238
  95%|█████████▌| 200/210 [1:38:49<04:55, 29.55s/it]
239
 
 
240
  95%|█████████▌| 200/210 [1:38:49<04:55, 29.55s/it]
241
  96%|█████████▌| 201/210 [1:39:22<04:36, 30.70s/it]
242
  96%|█████████▌| 202/210 [1:39:51<04:02, 30.31s/it]
243
  97%|█████████▋| 203/210 [1:40:21<03:30, 30.05s/it]
244
  97%|█████████▋| 204/210 [1:40:50<02:59, 29.86s/it]
245
  98%|█████████▊| 205/210 [1:41:20<02:28, 29.73s/it]
246
  98%|█████████▊| 206/210 [1:41:49<01:58, 29.64s/it]
247
  99%|█████████▊| 207/210 [1:42:19<01:28, 29.58s/it]
248
  99%|█████████▉| 208/210 [1:42:48<00:59, 29.53s/it]
249
 
 
250
 
 
 
 
 
 
 
 
 
1
+ model training desc: initialize model training...
2
+ 2023-12-30 17:59:51.341 | INFO | __main__:init_components:108 - Initializing components...
3
+
4
+ You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
5
+ 2023-12-30 18:00:03.415 | INFO | __main__:init_components:155 -
6
+
7
+ 2023-12-30 18:00:03.415 | INFO | __main__:init_components:156 - ********************
8
+ 2023-12-30 18:00:03.415 | INFO | __main__:init_components:157 - using llama2 model
9
+ 2023-12-30 18:00:03.415 | INFO | __main__:init_components:158 - ********************
10
+ 2023-12-30 18:00:03.415 | INFO | __main__:init_components:159 -
11
+
12
+ memory footprint of model: 4.024436950683594 GB
13
+ trainable params: 319,815,680 || all params: 7,058,231,296 || trainable%: 4.531102291607305
14
+ 2023-12-30 18:00:48.703 | INFO | component.dataset:__init__:14 - Loading data: /data0/maqi/KGLQA-data/datasets/QuALITY/Caption/quality_caption_and_rel_instruct/train.jsonl
15
+ 2023-12-30 18:00:48.807 | INFO | component.dataset:__init__:19 - there are 2523 data in dataset
16
+ 2023-12-30 18:00:49.225 | INFO | __main__:main:231 - *** starting training ***
17
+
18
  0%| | 0/210 [00:00<?, ?it/s]
19
  0%| | 1/210 [00:33<1:58:19, 33.97s/it]
20
  1%| | 2/210 [01:03<1:48:04, 31.18s/it]
21
  1%|▏ | 3/210 [01:32<1:44:41, 30.34s/it]
22
  2%|▏ | 4/210 [02:01<1:42:54, 29.97s/it]
23
  2%|▏ | 5/210 [02:31<1:41:41, 29.76s/it]
24
  3%|▎ | 6/210 [03:01<1:41:11, 29.76s/it]
25
  3%|▎ | 7/210 [03:30<1:40:18, 29.65s/it]
26
  4%|▍ | 8/210 [04:00<1:39:55, 29.68s/it]
27
  4%|▍ | 9/210 [04:29<1:39:06, 29.58s/it]
28
  5%|▍ | 10/210 [04:58<1:38:23, 29.52s/it]
29
 
30
+
31
  5%|▍ | 10/210 [04:59<1:38:23, 29.52s/it]
32
  5%|▌ | 11/210 [05:28<1:38:06, 29.58s/it]
33
  6%|▌ | 12/210 [05:58<1:37:45, 29.62s/it]
34
  6%|▌ | 13/210 [06:28<1:37:21, 29.65s/it]
35
  7%|▋ | 14/210 [06:57<1:36:36, 29.57s/it]
36
  7%|▋ | 15/210 [07:27<1:36:17, 29.63s/it]
37
  8%|▊ | 16/210 [07:57<1:35:56, 29.67s/it]
38
  8%|▊ | 17/210 [08:26<1:35:31, 29.70s/it]
39
  9%|▊ | 18/210 [08:56<1:35:05, 29.71s/it]
40
  9%|▉ | 19/210 [09:27<1:35:15, 29.92s/it]
41
  10%|▉ | 20/210 [09:57<1:34:55, 29.97s/it]
42
 
43
+
44
  10%|▉ | 20/210 [09:57<1:34:55, 29.97s/it]
45
  10%|█ | 21/210 [10:26<1:33:54, 29.81s/it]
46
  10%|█ | 22/210 [10:56<1:33:39, 29.89s/it]
47
  11%|█ | 23/210 [11:26<1:32:43, 29.75s/it]
48
  11%|█▏ | 24/210 [11:55<1:31:56, 29.66s/it]
49
  12%|█▏ | 25/210 [12:24<1:31:14, 29.59s/it]
50
  12%|█▏ | 26/210 [12:54<1:30:36, 29.55s/it]
51
  13%|█▎ | 27/210 [13:24<1:30:16, 29.60s/it]
52
  13%|█▎ | 28/210 [13:53<1:29:29, 29.50s/it]
53
  14%|█▍ | 29/210 [14:23<1:29:11, 29.57s/it]
54
  14%|█▍ | 30/210 [14:53<1:29:08, 29.72s/it]
55
 
56
+
57
  14%|█▍ | 30/210 [14:53<1:29:08, 29.72s/it]
58
  15%|█▍ | 31/210 [15:22<1:28:25, 29.64s/it]
59
  15%|█▌ | 32/210 [15:52<1:28:16, 29.76s/it]
60
  16%|█▌ | 33/210 [16:22<1:27:45, 29.75s/it]
61
  16%|█▌ | 34/210 [16:51<1:26:58, 29.65s/it]
62
  17%|█▋ | 35/210 [17:21<1:26:17, 29.59s/it]
63
  17%|█▋ | 36/210 [17:50<1:25:39, 29.54s/it]
64
  18%|█▊ | 37/210 [18:20<1:25:38, 29.70s/it]
65
  18%|█▊ | 38/210 [18:50<1:24:54, 29.62s/it]
66
  19%|█▊ | 39/210 [19:19<1:24:15, 29.56s/it]
67
  19%|█▉ | 40/210 [19:49<1:24:10, 29.71s/it]
68
 
69
+
70
  19%|█▉ | 40/210 [19:49<1:24:10, 29.71s/it]
71
  20%|█▉ | 41/210 [20:19<1:23:43, 29.73s/it]
72
  20%|██ | 42/210 [20:48<1:23:00, 29.64s/it]
73
  20%|██ | 43/210 [21:18<1:22:36, 29.68s/it]
74
  21%|██ | 44/210 [21:48<1:22:11, 29.71s/it]
75
  21%|██▏ | 45/210 [22:18<1:22:00, 29.82s/it]
76
  22%|██▏ | 46/210 [22:48<1:21:27, 29.80s/it]
77
  22%|██▏ | 47/210 [23:18<1:20:56, 29.80s/it]
78
  23%|██▎ | 48/210 [23:47<1:20:09, 29.69s/it]
79
  23%|██▎ | 49/210 [24:16<1:19:28, 29.62s/it]
80
  24%|██▍ | 50/210 [24:46<1:19:05, 29.66s/it]
81
 
82
+
83
  24%|██▍ | 50/210 [24:46<1:19:05, 29.66s/it]
84
  24%|██▍ | 51/210 [25:16<1:18:25, 29.60s/it]
85
  25%|██▍ | 52/210 [25:45<1:17:48, 29.55s/it]
86
  25%|██▌ | 53/210 [26:15<1:17:28, 29.61s/it]
87
  26%|██▌ | 54/210 [26:44<1:16:50, 29.55s/it]
88
  26%|██▌ | 55/210 [27:14<1:16:45, 29.71s/it]
89
  27%|██▋ | 56/210 [27:44<1:16:18, 29.73s/it]
90
  27%|██▋ | 57/210 [28:14<1:15:50, 29.74s/it]
91
  28%|██▊ | 58/210 [28:44<1:15:22, 29.75s/it]
92
  28%|██▊ | 59/210 [29:13<1:14:53, 29.76s/it]
93
  29%|██▊ | 60/210 [29:43<1:14:09, 29.66s/it]
94
 
95
+
96
  29%|██▊ | 60/210 [29:43<1:14:09, 29.66s/it]
97
  29%|██▉ | 61/210 [30:12<1:13:31, 29.61s/it]
98
  30%|██▉ | 62/210 [30:42<1:12:55, 29.56s/it]
99
  30%|███ | 63/210 [31:12<1:12:35, 29.63s/it]
100
  30%|███ | 64/210 [31:41<1:11:56, 29.56s/it]
101
  31%|███ | 65/210 [32:10<1:11:23, 29.54s/it]
102
  31%|███▏ | 66/210 [32:40<1:11:03, 29.61s/it]
103
  32%|███▏ | 67/210 [33:10<1:10:27, 29.56s/it]
104
  32%|███▏ | 68/210 [33:39<1:09:52, 29.53s/it]
105
  33%|███▎ | 69/210 [34:09<1:09:19, 29.50s/it]
106
  33%|███▎ | 70/210 [34:38<1:08:48, 29.49s/it]
107
 
108
+
109
  33%|███▎ | 70/210 [34:38<1:08:48, 29.49s/it]
110
  34%|███▍ | 71/210 [35:07<1:08:18, 29.48s/it]
111
  34%|███▍ | 72/210 [35:37<1:07:47, 29.48s/it]
112
  35%|███▍ | 73/210 [36:06<1:07:16, 29.46s/it]
113
  35%|███▌ | 74/210 [36:36<1:07:00, 29.56s/it]
114
  36%|███▌ | 75/210 [37:06<1:06:26, 29.53s/it]
115
  36%|███▌ | 76/210 [37:35<1:06:05, 29.59s/it]
116
  37%|███▋ | 77/210 [38:05<1:05:41, 29.64s/it]
117
  37%|███▋ | 78/210 [38:35<1:05:03, 29.57s/it]
118
  38%|███▊ | 79/210 [39:04<1:04:29, 29.54s/it]
119
  38%|███▊ | 80/210 [39:33<1:03:55, 29.51s/it]
120
 
121
+
122
  38%|███▊ | 80/210 [39:33<1:03:55, 29.51s/it]
123
  39%|███▊ | 81/210 [40:03<1:03:34, 29.57s/it]
124
  39%|███▉ | 82/210 [40:33<1:03:12, 29.63s/it]
125
  40%|███▉ | 83/210 [41:03<1:02:47, 29.67s/it]
126
  40%|████ | 84/210 [41:32<1:02:21, 29.70s/it]
127
  40%|████ | 85/210 [42:02<1:01:42, 29.62s/it]
128
  41%|████ | 86/210 [42:31<1:01:05, 29.56s/it]
129
  41%|████▏ | 87/210 [43:01<1:00:32, 29.53s/it]
130
  42%|████▏ | 88/210 [43:30<59:59, 29.51s/it]
131
  42%|████▏ | 89/210 [44:00<59:38, 29.58s/it]
132
  43%|████▎ | 90/210 [44:29<59:04, 29.53s/it]
133
 
134
+
135
  43%|████▎ | 90/210 [44:29<59:04, 29.53s/it]
136
  43%|████▎ | 91/210 [44:59<58:43, 29.61s/it]
137
  44%|████▍ | 92/210 [45:29<58:18, 29.65s/it]
138
  44%|████▍ | 93/210 [45:58<57:41, 29.59s/it]
139
  45%|████▍ | 94/210 [46:28<57:29, 29.74s/it]
140
  45%|████▌ | 95/210 [46:58<57:00, 29.74s/it]
141
  46%|████▌ | 96/210 [47:28<56:19, 29.64s/it]
142
  46%|████▌ | 97/210 [47:57<55:41, 29.57s/it]
143
  47%|████▋ | 98/210 [48:27<55:18, 29.63s/it]
144
  47%|████▋ | 99/210 [48:56<54:42, 29.57s/it]
145
  48%|████▊ | 100/210 [49:26<54:18, 29.62s/it]
146
 
147
+
148
  48%|████▊ | 100/210 [49:26<54:18, 29.62s/it]
149
  48%|████▊ | 101/210 [49:59<55:56, 30.80s/it]
150
  49%|████▊ | 102/210 [50:29<54:42, 30.39s/it]
151
  49%|████▉ | 103/210 [50:59<53:50, 30.20s/it]
152
  50%|████▉ | 104/210 [51:28<52:57, 29.98s/it]
153
  50%|█████ | 105/210 [51:58<52:20, 29.91s/it]
154
  50%|█████ | 106/210 [52:27<51:35, 29.77s/it]
155
  51%|█████ | 107/210 [52:57<51:05, 29.77s/it]
156
  51%|█████▏ | 108/210 [53:27<50:35, 29.76s/it]
157
  52%|█████▏ | 109/210 [53:56<49:54, 29.65s/it]
158
  52%|█████▏ | 110/210 [54:26<49:19, 29.59s/it]
159
 
160
+
161
  52%|█████▏ | 110/210 [54:26<49:19, 29.59s/it]
162
  53%|█████▎ | 111/210 [54:55<48:54, 29.64s/it]
163
  53%|█████▎ | 112/210 [55:25<48:16, 29.56s/it]
164
  54%|█████▍ | 113/210 [55:54<47:44, 29.53s/it]
165
  54%|█████▍ | 114/210 [56:24<47:21, 29.59s/it]
166
  55%|█████▍ | 115/210 [56:53<46:46, 29.54s/it]
167
  55%|█████▌ | 116/210 [57:24<46:31, 29.70s/it]
168
  56%|█████▌ | 117/210 [57:53<46:03, 29.71s/it]
169
  56%|█████▌ | 118/210 [58:23<45:25, 29.63s/it]
170
  57%|█████▋ | 119/210 [58:52<44:59, 29.66s/it]
171
  57%|█████▋ | 120/210 [59:22<44:23, 29.59s/it]
172
 
173
+
174
  57%|█████▋ | 120/210 [59:22<44:23, 29.59s/it]
175
  58%|█████▊ | 121/210 [59:52<43:55, 29.61s/it]
176
  58%|█████▊ | 122/210 [1:00:21<43:29, 29.65s/it]
177
  59%|█████▊ | 123/210 [1:00:51<42:53, 29.58s/it]
178
  59%|█████▉ | 124/210 [1:01:20<42:19, 29.53s/it]
179
  60%|█████▉ | 125/210 [1:01:50<41:56, 29.60s/it]
180
  60%|██████ | 126/210 [1:02:19<41:21, 29.54s/it]
181
  60%|██████ | 127/210 [1:02:49<40:48, 29.50s/it]
182
  61%|██████ | 128/210 [1:03:18<40:19, 29.50s/it]
183
  61%|██████▏ | 129/210 [1:03:48<39:55, 29.58s/it]
184
  62%|██████▏ | 130/210 [1:04:17<39:22, 29.53s/it]
185
 
186
+
187
  62%|██████▏ | 130/210 [1:04:17<39:22, 29.53s/it]
188
  62%|██████▏ | 131/210 [1:04:47<38:49, 29.49s/it]
189
  63%|██████▎ | 132/210 [1:05:16<38:18, 29.47s/it]
190
  63%|██████▎ | 133/210 [1:05:46<37:47, 29.44s/it]
191
  64%|██████▍ | 134/210 [1:06:15<37:16, 29.43s/it]
192
  64%|██████▍ | 135/210 [1:06:45<36:53, 29.52s/it]
193
  65%|██████▍ | 136/210 [1:07:14<36:21, 29.49s/it]
194
  65%|██████▌ | 137/210 [1:07:44<35:51, 29.47s/it]
195
  66%|██████▌ | 138/210 [1:08:13<35:27, 29.55s/it]
196
  66%|██████▌ | 139/210 [1:08:43<35:02, 29.61s/it]
197
  67%|██████▋ | 140/210 [1:09:13<34:35, 29.65s/it]
198
 
199
+
200
  67%|██████▋ | 140/210 [1:09:13<34:35, 29.65s/it]
201
  67%|██████▋ | 141/210 [1:09:42<34:07, 29.67s/it]
202
  68%|██████▊ | 142/210 [1:10:12<33:39, 29.69s/it]
203
  68%|██████▊ | 143/210 [1:10:42<33:16, 29.80s/it]
204
  69%|██████▊ | 144/210 [1:11:12<32:39, 29.69s/it]
205
  69%|██████▉ | 145/210 [1:11:41<32:10, 29.70s/it]
206
  70%|██████▉ | 146/210 [1:12:11<31:35, 29.62s/it]
207
  70%|███████ | 147/210 [1:12:40<31:02, 29.56s/it]
208
  70%|███████ | 148/210 [1:13:10<30:30, 29.52s/it]
209
  71%|███████ | 149/210 [1:13:39<29:58, 29.49s/it]
210
  71%|███████▏ | 150/210 [1:14:09<29:28, 29.47s/it]
211
 
212
+
213
  71%|███████▏ | 150/210 [1:14:09<29:28, 29.47s/it]
214
  72%|███████▏ | 151/210 [1:14:39<29:08, 29.64s/it]
215
  72%|███████▏ | 152/210 [1:15:08<28:35, 29.58s/it]
216
  73%|███████▎ | 153/210 [1:15:38<28:14, 29.73s/it]
217
  73%|███████▎ | 154/210 [1:16:07<27:39, 29.63s/it]
218
  74%|███████▍ | 155/210 [1:16:37<27:10, 29.65s/it]
219
  74%|███████▍ | 156/210 [1:17:07<26:37, 29.58s/it]
220
  75%|███████▍ | 157/210 [1:17:36<26:04, 29.52s/it]
221
  75%|███████▌ | 158/210 [1:18:06<25:36, 29.55s/it]
222
  76%|███████▌ | 159/210 [1:18:35<25:09, 29.60s/it]
223
  76%|███████▌ | 160/210 [1:19:05<24:42, 29.65s/it]
224
 
225
+
226
  76%|███████▌ | 160/210 [1:19:05<24:42, 29.65s/it]
227
  77%|███████▋ | 161/210 [1:19:35<24:14, 29.68s/it]
228
  77%|███████▋ | 162/210 [1:20:04<23:41, 29.60s/it]
229
  78%|███████▊ | 163/210 [1:20:34<23:12, 29.64s/it]
230
  78%|███████▊ | 164/210 [1:21:04<22:44, 29.66s/it]
231
  79%|███████▊ | 165/210 [1:21:33<22:15, 29.67s/it]
232
  79%|███████▉ | 166/210 [1:22:03<21:40, 29.56s/it]
233
  80%|███████▉ | 167/210 [1:22:32<21:08, 29.51s/it]
234
  80%|████████ | 168/210 [1:23:01<20:38, 29.48s/it]
235
  80%|████████ | 169/210 [1:23:31<20:11, 29.56s/it]
236
  81%|████████ | 170/210 [1:24:01<19:40, 29.51s/it]
237
 
238
+
239
  81%|████████ | 170/210 [1:24:01<19:40, 29.51s/it]
240
  81%|████████▏ | 171/210 [1:24:30<19:13, 29.57s/it]
241
  82%|████████▏ | 172/210 [1:25:00<18:42, 29.53s/it]
242
  82%|████████▏ | 173/210 [1:25:30<18:18, 29.68s/it]
243
  83%|████████▎ | 174/210 [1:25:59<17:45, 29.61s/it]
244
  83%|████████▎ | 175/210 [1:26:29<17:14, 29.54s/it]
245
  84%|████████▍ | 176/210 [1:26:58<16:43, 29.51s/it]
246
  84%|████████▍ | 177/210 [1:27:28<16:15, 29.57s/it]
247
  85%|████████▍ | 178/210 [1:27:57<15:44, 29.52s/it]
248
  85%|████████▌ | 179/210 [1:28:27<15:13, 29.48s/it]
249
  86%|████████▌ | 180/210 [1:28:56<14:43, 29.44s/it]
250
 
251
+
252
  86%|████████▌ | 180/210 [1:28:56<14:43, 29.44s/it]
253
  86%|████████▌ | 181/210 [1:29:26<14:16, 29.52s/it]
254
  87%|████████▋ | 182/210 [1:29:55<13:45, 29.48s/it]
255
  87%|████████▋ | 183/210 [1:30:25<13:17, 29.55s/it]
256
  88%|████████▊ | 184/210 [1:30:54<12:49, 29.60s/it]
257
  88%|████████▊ | 185/210 [1:31:24<12:18, 29.54s/it]
258
  89%|████████▊ | 186/210 [1:31:53<11:47, 29.50s/it]
259
  89%|████████▉ | 187/210 [1:32:23<11:17, 29.47s/it]
260
  90%|████████▉ | 188/210 [1:32:52<10:49, 29.54s/it]
261
  90%|█████████ | 189/210 [1:33:22<10:21, 29.60s/it]
262
  90%|█████████ | 190/210 [1:33:52<09:52, 29.64s/it]
263
 
264
+
265
  90%|█████████ | 190/210 [1:33:52<09:52, 29.64s/it]
266
  91%|█████████ | 191/210 [1:34:22<09:25, 29.76s/it]
267
  91%|█████████▏| 192/210 [1:34:52<08:55, 29.75s/it]
268
  92%|█████████▏| 193/210 [1:35:21<08:25, 29.75s/it]
269
  92%|█████████▏| 194/210 [1:35:51<07:55, 29.74s/it]
270
  93%|█████████▎| 195/210 [1:36:20<07:24, 29.65s/it]
271
  93%|█████████▎| 196/210 [1:36:50<06:55, 29.67s/it]
272
  94%|█████████▍| 197/210 [1:37:20<06:27, 29.79s/it]
273
  94%|█████████▍| 198/210 [1:37:50<05:56, 29.71s/it]
274
  95%|█████████▍| 199/210 [1:38:19<05:25, 29.61s/it]
275
  95%|█████████▌| 200/210 [1:38:49<04:55, 29.55s/it]
276
 
277
+
278
  95%|█████████▌| 200/210 [1:38:49<04:55, 29.55s/it]
279
  96%|█████████▌| 201/210 [1:39:22<04:36, 30.70s/it]
280
  96%|█████████▌| 202/210 [1:39:51<04:02, 30.31s/it]
281
  97%|█████████▋| 203/210 [1:40:21<03:30, 30.05s/it]
282
  97%|█████████▋| 204/210 [1:40:50<02:59, 29.86s/it]
283
  98%|█████████▊| 205/210 [1:41:20<02:28, 29.73s/it]
284
  98%|█████████▊| 206/210 [1:41:49<01:58, 29.64s/it]
285
  99%|█████████▊| 207/210 [1:42:19<01:28, 29.58s/it]
286
  99%|█████████▉| 208/210 [1:42:48<00:59, 29.53s/it]
287
 
288
+
289
 
290
+
291
+ ***** train metrics *****
292
+ epoch = 1.0
293
+ train_loss = 0.4878
294
+ train_runtime = 1:43:54.34
295
+ train_samples_per_second = 0.405
296
+ train_steps_per_second = 0.034
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.4878490357171921,
4
+ "train_runtime": 6234.3462,
5
+ "train_samples_per_second": 0.405,
6
+ "train_steps_per_second": 0.034
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.997624703087886,
5
+ "eval_steps": 500,
6
+ "global_step": 210,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05,
13
+ "global_step": 10,
14
+ "learning_rate": 4.761904761904762e-05,
15
+ "loss": 0.5879,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.1,
20
+ "global_step": 20,
21
+ "learning_rate": 9.523809523809524e-05,
22
+ "loss": 0.506,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.14,
27
+ "global_step": 30,
28
+ "learning_rate": 0.0001,
29
+ "loss": 0.5252,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.19,
34
+ "global_step": 40,
35
+ "learning_rate": 0.0001,
36
+ "loss": 0.5572,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.24,
41
+ "global_step": 50,
42
+ "learning_rate": 0.0001,
43
+ "loss": 0.4937,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.29,
48
+ "global_step": 60,
49
+ "learning_rate": 0.0001,
50
+ "loss": 0.4925,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.33,
55
+ "global_step": 70,
56
+ "learning_rate": 0.0001,
57
+ "loss": 0.4309,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.38,
62
+ "global_step": 80,
63
+ "learning_rate": 0.0001,
64
+ "loss": 0.4831,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.43,
69
+ "global_step": 90,
70
+ "learning_rate": 0.0001,
71
+ "loss": 0.4896,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.48,
76
+ "global_step": 100,
77
+ "learning_rate": 0.0001,
78
+ "loss": 0.4257,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.52,
83
+ "global_step": 110,
84
+ "learning_rate": 0.0001,
85
+ "loss": 0.5,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.57,
90
+ "global_step": 120,
91
+ "learning_rate": 0.0001,
92
+ "loss": 0.4954,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.62,
97
+ "global_step": 130,
98
+ "learning_rate": 0.0001,
99
+ "loss": 0.4691,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.67,
104
+ "global_step": 140,
105
+ "learning_rate": 0.0001,
106
+ "loss": 0.4373,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.71,
111
+ "global_step": 150,
112
+ "learning_rate": 0.0001,
113
+ "loss": 0.526,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.76,
118
+ "global_step": 160,
119
+ "learning_rate": 0.0001,
120
+ "loss": 0.4297,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.81,
125
+ "global_step": 170,
126
+ "learning_rate": 0.0001,
127
+ "loss": 0.4708,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.86,
132
+ "global_step": 180,
133
+ "learning_rate": 0.0001,
134
+ "loss": 0.4872,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.9,
139
+ "global_step": 190,
140
+ "learning_rate": 0.0001,
141
+ "loss": 0.4888,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.95,
146
+ "global_step": 200,
147
+ "learning_rate": 0.0001,
148
+ "loss": 0.4754,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 1.0,
153
+ "global_step": 210,
154
+ "learning_rate": 0.0001,
155
+ "loss": 0.4733,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 1.0,
160
+ "step": 210,
161
+ "total_flos": 1.1368810483705446e+17,
162
+ "train_loss": 0.4878490357171921,
163
+ "train_runtime": 6234.3462,
164
+ "train_samples_per_second": 0.405,
165
+ "train_steps_per_second": 0.034
166
+ }
167
+ ],
168
+ "logging_steps": 10,
169
+ "max_steps": 210,
170
+ "num_train_epochs": 1,
171
+ "save_steps": 100,
172
+ "total_flos": 1.1368810483705446e+17,
173
+ "trial_name": null,
174
+ "trial_params": null
175
+ }