Training in progress, step 50
Browse files- .ipynb_checkpoints/llama3_lora_sft-checkpoint.yaml +46 -0
- adapter_config.json +5 -5
- adapter_model.safetensors +1 -1
- llama3_lora_sft.yaml +5 -5
- tokenizer_config.json +1 -1
- trainer_log.jsonl +6 -68
- training_args.bin +1 -1
.ipynb_checkpoints/llama3_lora_sft-checkpoint.yaml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### model
|
| 2 |
+
model_name_or_path: google/gemma-2-9b-it
|
| 3 |
+
trust_remote_code: true
|
| 4 |
+
|
| 5 |
+
### method
|
| 6 |
+
stage: sft
|
| 7 |
+
do_train: true
|
| 8 |
+
finetuning_type: lora
|
| 9 |
+
lora_target: all
|
| 10 |
+
|
| 11 |
+
### dataset
|
| 12 |
+
dataset: bct_non_cot_sft_1000
|
| 13 |
+
dataset_dir: data_private
|
| 14 |
+
template: gemma
|
| 15 |
+
cutoff_len: 1024
|
| 16 |
+
# max_samples: 1000
|
| 17 |
+
overwrite_cache: true
|
| 18 |
+
preprocessing_num_workers: 16
|
| 19 |
+
|
| 20 |
+
### output
|
| 21 |
+
output_dir: saves/gemma-2-9b-it/sft-1000/train
|
| 22 |
+
logging_steps: 10
|
| 23 |
+
save_steps: 50
|
| 24 |
+
plot_loss: true
|
| 25 |
+
overwrite_output_dir: true
|
| 26 |
+
save_total_limit: 3
|
| 27 |
+
push_to_hub: true
|
| 28 |
+
hub_model_id: chchen/gemma-2-9b-it-sft-1000
|
| 29 |
+
load_best_model_at_end: true
|
| 30 |
+
|
| 31 |
+
### train
|
| 32 |
+
per_device_train_batch_size: 4
|
| 33 |
+
gradient_accumulation_steps: 8
|
| 34 |
+
learning_rate: 1.0e-4
|
| 35 |
+
num_train_epochs: 10.0
|
| 36 |
+
lr_scheduler_type: cosine
|
| 37 |
+
warmup_ratio: 0.1
|
| 38 |
+
bf16: true
|
| 39 |
+
ddp_timeout: 180000000
|
| 40 |
+
flash_attn: disabled
|
| 41 |
+
|
| 42 |
+
### eval
|
| 43 |
+
val_size: 0.1
|
| 44 |
+
per_device_eval_batch_size: 4
|
| 45 |
+
eval_strategy: steps
|
| 46 |
+
eval_steps: 50
|
adapter_config.json
CHANGED
|
@@ -21,12 +21,12 @@
|
|
| 21 |
"revision": null,
|
| 22 |
"target_modules": [
|
| 23 |
"o_proj",
|
| 24 |
-
"
|
| 25 |
-
"up_proj",
|
| 26 |
-
"v_proj",
|
| 27 |
-
"gate_proj",
|
| 28 |
"down_proj",
|
| 29 |
-
"
|
|
|
|
|
|
|
|
|
|
| 30 |
],
|
| 31 |
"task_type": "CAUSAL_LM",
|
| 32 |
"use_dora": false,
|
|
|
|
| 21 |
"revision": null,
|
| 22 |
"target_modules": [
|
| 23 |
"o_proj",
|
| 24 |
+
"q_proj",
|
|
|
|
|
|
|
|
|
|
| 25 |
"down_proj",
|
| 26 |
+
"gate_proj",
|
| 27 |
+
"v_proj",
|
| 28 |
+
"up_proj",
|
| 29 |
+
"k_proj"
|
| 30 |
],
|
| 31 |
"task_type": "CAUSAL_LM",
|
| 32 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 108113968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d703523c94424012be4ffe7395f2897c9d6e9e85723b40d6675e4dca3c2f1e8
|
| 3 |
size 108113968
|
llama3_lora_sft.yaml
CHANGED
|
@@ -18,7 +18,7 @@ overwrite_cache: true
|
|
| 18 |
preprocessing_num_workers: 16
|
| 19 |
|
| 20 |
### output
|
| 21 |
-
output_dir: saves/
|
| 22 |
logging_steps: 10
|
| 23 |
save_steps: 50
|
| 24 |
plot_loss: true
|
|
@@ -29,18 +29,18 @@ hub_model_id: chchen/gemma-2-9b-it-sft-1000
|
|
| 29 |
load_best_model_at_end: true
|
| 30 |
|
| 31 |
### train
|
| 32 |
-
per_device_train_batch_size:
|
| 33 |
-
gradient_accumulation_steps:
|
| 34 |
learning_rate: 1.0e-4
|
| 35 |
num_train_epochs: 10.0
|
| 36 |
lr_scheduler_type: cosine
|
| 37 |
warmup_ratio: 0.1
|
| 38 |
bf16: true
|
| 39 |
ddp_timeout: 180000000
|
| 40 |
-
|
| 41 |
|
| 42 |
### eval
|
| 43 |
val_size: 0.1
|
| 44 |
-
per_device_eval_batch_size:
|
| 45 |
eval_strategy: steps
|
| 46 |
eval_steps: 50
|
|
|
|
| 18 |
preprocessing_num_workers: 16
|
| 19 |
|
| 20 |
### output
|
| 21 |
+
output_dir: saves/gemma-2-9b-it/sft-1000/train
|
| 22 |
logging_steps: 10
|
| 23 |
save_steps: 50
|
| 24 |
plot_loss: true
|
|
|
|
| 29 |
load_best_model_at_end: true
|
| 30 |
|
| 31 |
### train
|
| 32 |
+
per_device_train_batch_size: 4
|
| 33 |
+
gradient_accumulation_steps: 8
|
| 34 |
learning_rate: 1.0e-4
|
| 35 |
num_train_epochs: 10.0
|
| 36 |
lr_scheduler_type: cosine
|
| 37 |
warmup_ratio: 0.1
|
| 38 |
bf16: true
|
| 39 |
ddp_timeout: 180000000
|
| 40 |
+
flash_attn: disabled
|
| 41 |
|
| 42 |
### eval
|
| 43 |
val_size: 0.1
|
| 44 |
+
per_device_eval_batch_size: 4
|
| 45 |
eval_strategy: steps
|
| 46 |
eval_steps: 50
|
tokenizer_config.json
CHANGED
|
@@ -2003,7 +2003,7 @@
|
|
| 2003 |
"chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
|
| 2004 |
"clean_up_tokenization_spaces": false,
|
| 2005 |
"eos_token": "<eos>",
|
| 2006 |
-
"model_max_length":
|
| 2007 |
"pad_token": "<pad>",
|
| 2008 |
"padding_side": "right",
|
| 2009 |
"sp_model_kwargs": {},
|
|
|
|
| 2003 |
"chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
|
| 2004 |
"clean_up_tokenization_spaces": false,
|
| 2005 |
"eos_token": "<eos>",
|
| 2006 |
+
"model_max_length": 1024,
|
| 2007 |
"pad_token": "<pad>",
|
| 2008 |
"padding_side": "right",
|
| 2009 |
"sp_model_kwargs": {},
|
trainer_log.jsonl
CHANGED
|
@@ -1,68 +1,6 @@
|
|
| 1 |
-
{"current_steps": 10, "total_steps":
|
| 2 |
-
{"current_steps": 20, "total_steps":
|
| 3 |
-
{"current_steps": 30, "total_steps":
|
| 4 |
-
{"current_steps": 40, "total_steps":
|
| 5 |
-
{"current_steps": 50, "total_steps":
|
| 6 |
-
{"current_steps": 50, "total_steps":
|
| 7 |
-
{"current_steps": 60, "total_steps": 560, "loss": 0.1001, "lr": 9.998445910004082e-05, "epoch": 1.0666666666666667, "percentage": 10.71, "elapsed_time": "0:10:01", "remaining_time": "1:23:29"}
|
| 8 |
-
{"current_steps": 70, "total_steps": 560, "loss": 0.0474, "lr": 9.980973490458728e-05, "epoch": 1.2444444444444445, "percentage": 12.5, "elapsed_time": "0:11:35", "remaining_time": "1:21:11"}
|
| 9 |
-
{"current_steps": 80, "total_steps": 560, "loss": 0.0467, "lr": 9.944154131125642e-05, "epoch": 1.4222222222222223, "percentage": 14.29, "elapsed_time": "0:13:11", "remaining_time": "1:19:07"}
|
| 10 |
-
{"current_steps": 90, "total_steps": 560, "loss": 0.104, "lr": 9.888130844596524e-05, "epoch": 1.6, "percentage": 16.07, "elapsed_time": "0:14:45", "remaining_time": "1:17:04"}
|
| 11 |
-
{"current_steps": 100, "total_steps": 560, "loss": 0.0381, "lr": 9.81312123475006e-05, "epoch": 1.7777777777777777, "percentage": 17.86, "elapsed_time": "0:16:21", "remaining_time": "1:15:13"}
|
| 12 |
-
{"current_steps": 100, "total_steps": 560, "eval_loss": 0.05637728050351143, "epoch": 1.7777777777777777, "percentage": 17.86, "elapsed_time": "0:16:31", "remaining_time": "1:16:01"}
|
| 13 |
-
{"current_steps": 110, "total_steps": 560, "loss": 0.0982, "lr": 9.719416651541839e-05, "epoch": 1.9555555555555557, "percentage": 19.64, "elapsed_time": "0:18:11", "remaining_time": "1:14:24"}
|
| 14 |
-
{"current_steps": 120, "total_steps": 560, "loss": 0.0354, "lr": 9.607381059352038e-05, "epoch": 2.1333333333333333, "percentage": 21.43, "elapsed_time": "0:19:46", "remaining_time": "1:12:29"}
|
| 15 |
-
{"current_steps": 130, "total_steps": 560, "loss": 0.0235, "lr": 9.477449623286505e-05, "epoch": 2.311111111111111, "percentage": 23.21, "elapsed_time": "0:21:20", "remaining_time": "1:10:35"}
|
| 16 |
-
{"current_steps": 140, "total_steps": 560, "loss": 0.0447, "lr": 9.330127018922194e-05, "epoch": 2.488888888888889, "percentage": 25.0, "elapsed_time": "0:22:54", "remaining_time": "1:08:44"}
|
| 17 |
-
{"current_steps": 150, "total_steps": 560, "loss": 0.0357, "lr": 9.165985472062246e-05, "epoch": 2.6666666666666665, "percentage": 26.79, "elapsed_time": "0:24:29", "remaining_time": "1:06:57"}
|
| 18 |
-
{"current_steps": 150, "total_steps": 560, "eval_loss": 0.0641837865114212, "epoch": 2.6666666666666665, "percentage": 26.79, "elapsed_time": "0:24:39", "remaining_time": "1:07:25"}
|
| 19 |
-
{"current_steps": 160, "total_steps": 560, "loss": 0.0244, "lr": 8.985662536114613e-05, "epoch": 2.8444444444444446, "percentage": 28.57, "elapsed_time": "0:26:22", "remaining_time": "1:05:56"}
|
| 20 |
-
{"current_steps": 170, "total_steps": 560, "loss": 0.0279, "lr": 8.789858615727265e-05, "epoch": 3.022222222222222, "percentage": 30.36, "elapsed_time": "0:27:58", "remaining_time": "1:04:11"}
|
| 21 |
-
{"current_steps": 180, "total_steps": 560, "loss": 0.0101, "lr": 8.579334246298593e-05, "epoch": 3.2, "percentage": 32.14, "elapsed_time": "0:29:34", "remaining_time": "1:02:26"}
|
| 22 |
-
{"current_steps": 190, "total_steps": 560, "loss": 0.0122, "lr": 8.354907139929851e-05, "epoch": 3.3777777777777778, "percentage": 33.93, "elapsed_time": "0:31:09", "remaining_time": "1:00:40"}
|
| 23 |
-
{"current_steps": 200, "total_steps": 560, "loss": 0.0104, "lr": 8.117449009293668e-05, "epoch": 3.5555555555555554, "percentage": 35.71, "elapsed_time": "0:32:44", "remaining_time": "0:58:55"}
|
| 24 |
-
{"current_steps": 200, "total_steps": 560, "eval_loss": 0.08028902113437653, "epoch": 3.5555555555555554, "percentage": 35.71, "elapsed_time": "0:32:54", "remaining_time": "0:59:14"}
|
| 25 |
-
{"current_steps": 210, "total_steps": 560, "loss": 0.0075, "lr": 7.86788218175523e-05, "epoch": 3.7333333333333334, "percentage": 37.5, "elapsed_time": "0:34:35", "remaining_time": "0:57:39"}
|
| 26 |
-
{"current_steps": 220, "total_steps": 560, "loss": 0.0197, "lr": 7.60717601689749e-05, "epoch": 3.911111111111111, "percentage": 39.29, "elapsed_time": "0:36:08", "remaining_time": "0:55:51"}
|
| 27 |
-
{"current_steps": 230, "total_steps": 560, "loss": 0.0078, "lr": 7.33634314136531e-05, "epoch": 4.088888888888889, "percentage": 41.07, "elapsed_time": "0:37:45", "remaining_time": "0:54:10"}
|
| 28 |
-
{"current_steps": 240, "total_steps": 560, "loss": 0.0052, "lr": 7.056435515653059e-05, "epoch": 4.266666666666667, "percentage": 42.86, "elapsed_time": "0:39:19", "remaining_time": "0:52:26"}
|
| 29 |
-
{"current_steps": 250, "total_steps": 560, "loss": 0.0108, "lr": 6.768540348112907e-05, "epoch": 4.444444444444445, "percentage": 44.64, "elapsed_time": "0:40:55", "remaining_time": "0:50:45"}
|
| 30 |
-
{"current_steps": 250, "total_steps": 560, "eval_loss": 0.078793466091156, "epoch": 4.444444444444445, "percentage": 44.64, "elapsed_time": "0:41:05", "remaining_time": "0:50:57"}
|
| 31 |
-
{"current_steps": 260, "total_steps": 560, "loss": 0.0024, "lr": 6.473775872054521e-05, "epoch": 4.622222222222222, "percentage": 46.43, "elapsed_time": "0:42:48", "remaining_time": "0:49:23"}
|
| 32 |
-
{"current_steps": 270, "total_steps": 560, "loss": 0.0052, "lr": 6.173287002338577e-05, "epoch": 4.8, "percentage": 48.21, "elapsed_time": "0:44:23", "remaining_time": "0:47:41"}
|
| 33 |
-
{"current_steps": 280, "total_steps": 560, "loss": 0.0032, "lr": 5.868240888334653e-05, "epoch": 4.977777777777778, "percentage": 50.0, "elapsed_time": "0:46:00", "remaining_time": "0:46:00"}
|
| 34 |
-
{"current_steps": 290, "total_steps": 560, "loss": 0.0048, "lr": 5.559822380516539e-05, "epoch": 5.155555555555556, "percentage": 51.79, "elapsed_time": "0:47:35", "remaining_time": "0:44:18"}
|
| 35 |
-
{"current_steps": 300, "total_steps": 560, "loss": 0.0008, "lr": 5.249229428303486e-05, "epoch": 5.333333333333333, "percentage": 53.57, "elapsed_time": "0:49:11", "remaining_time": "0:42:37"}
|
| 36 |
-
{"current_steps": 300, "total_steps": 560, "eval_loss": 0.08963307738304138, "epoch": 5.333333333333333, "percentage": 53.57, "elapsed_time": "0:49:21", "remaining_time": "0:42:46"}
|
| 37 |
-
{"current_steps": 310, "total_steps": 560, "loss": 0.0003, "lr": 4.9376684270229254e-05, "epoch": 5.511111111111111, "percentage": 55.36, "elapsed_time": "0:51:02", "remaining_time": "0:41:09"}
|
| 38 |
-
{"current_steps": 320, "total_steps": 560, "loss": 0.0006, "lr": 4.626349532067879e-05, "epoch": 5.688888888888889, "percentage": 57.14, "elapsed_time": "0:52:37", "remaining_time": "0:39:28"}
|
| 39 |
-
{"current_steps": 330, "total_steps": 560, "loss": 0.0007, "lr": 4.316481958449634e-05, "epoch": 5.866666666666667, "percentage": 58.93, "elapsed_time": "0:54:12", "remaining_time": "0:37:46"}
|
| 40 |
-
{"current_steps": 340, "total_steps": 560, "loss": 0.001, "lr": 4.0092692840030134e-05, "epoch": 6.044444444444444, "percentage": 60.71, "elapsed_time": "0:55:47", "remaining_time": "0:36:06"}
|
| 41 |
-
{"current_steps": 350, "total_steps": 560, "loss": 0.0001, "lr": 3.705904774487396e-05, "epoch": 6.222222222222222, "percentage": 62.5, "elapsed_time": "0:57:23", "remaining_time": "0:34:25"}
|
| 42 |
-
{"current_steps": 350, "total_steps": 560, "eval_loss": 0.10594847053289413, "epoch": 6.222222222222222, "percentage": 62.5, "elapsed_time": "0:57:32", "remaining_time": "0:34:31"}
|
| 43 |
-
{"current_steps": 360, "total_steps": 560, "loss": 0.0002, "lr": 3.4075667487415785e-05, "epoch": 6.4, "percentage": 64.29, "elapsed_time": "0:59:14", "remaining_time": "0:32:54"}
|
| 44 |
-
{"current_steps": 370, "total_steps": 560, "loss": 0.0004, "lr": 3.115414001894974e-05, "epoch": 6.5777777777777775, "percentage": 66.07, "elapsed_time": "1:00:49", "remaining_time": "0:31:13"}
|
| 45 |
-
{"current_steps": 380, "total_steps": 560, "loss": 0.0001, "lr": 2.8305813044122097e-05, "epoch": 6.7555555555555555, "percentage": 67.86, "elapsed_time": "1:02:24", "remaining_time": "0:29:33"}
|
| 46 |
-
{"current_steps": 390, "total_steps": 560, "loss": 0.0001, "lr": 2.5541749944535554e-05, "epoch": 6.933333333333334, "percentage": 69.64, "elapsed_time": "1:03:59", "remaining_time": "0:27:53"}
|
| 47 |
-
{"current_steps": 400, "total_steps": 560, "loss": 0.0001, "lr": 2.2872686806712035e-05, "epoch": 7.111111111111111, "percentage": 71.43, "elapsed_time": "1:05:34", "remaining_time": "0:26:13"}
|
| 48 |
-
{"current_steps": 400, "total_steps": 560, "eval_loss": 0.11472605913877487, "epoch": 7.111111111111111, "percentage": 71.43, "elapsed_time": "1:05:43", "remaining_time": "0:26:17"}
|
| 49 |
-
{"current_steps": 410, "total_steps": 560, "loss": 0.0, "lr": 2.0308990721324927e-05, "epoch": 7.288888888888889, "percentage": 73.21, "elapsed_time": "1:07:25", "remaining_time": "0:24:39"}
|
| 50 |
-
{"current_steps": 420, "total_steps": 560, "loss": 0.0001, "lr": 1.7860619515673033e-05, "epoch": 7.466666666666667, "percentage": 75.0, "elapsed_time": "1:08:59", "remaining_time": "0:22:59"}
|
| 51 |
-
{"current_steps": 430, "total_steps": 560, "loss": 0.0001, "lr": 1.553708307580265e-05, "epoch": 7.644444444444445, "percentage": 76.79, "elapsed_time": "1:10:34", "remaining_time": "0:21:20"}
|
| 52 |
-
{"current_steps": 440, "total_steps": 560, "loss": 0.0001, "lr": 1.3347406408508695e-05, "epoch": 7.822222222222222, "percentage": 78.57, "elapsed_time": "1:12:10", "remaining_time": "0:19:41"}
|
| 53 |
-
{"current_steps": 450, "total_steps": 560, "loss": 0.0001, "lr": 1.130009458668863e-05, "epoch": 8.0, "percentage": 80.36, "elapsed_time": "1:13:46", "remaining_time": "0:18:01"}
|
| 54 |
-
{"current_steps": 450, "total_steps": 560, "eval_loss": 0.11791631579399109, "epoch": 8.0, "percentage": 80.36, "elapsed_time": "1:13:55", "remaining_time": "0:18:04"}
|
| 55 |
-
{"current_steps": 460, "total_steps": 560, "loss": 0.0, "lr": 9.403099714207175e-06, "epoch": 8.177777777777777, "percentage": 82.14, "elapsed_time": "1:15:39", "remaining_time": "0:16:26"}
|
| 56 |
-
{"current_steps": 470, "total_steps": 560, "loss": 0.0, "lr": 7.663790038585793e-06, "epoch": 8.355555555555556, "percentage": 83.93, "elapsed_time": "1:17:15", "remaining_time": "0:14:47"}
|
| 57 |
-
{"current_steps": 480, "total_steps": 560, "loss": 0.0001, "lr": 6.088921331488568e-06, "epoch": 8.533333333333333, "percentage": 85.71, "elapsed_time": "1:18:50", "remaining_time": "0:13:08"}
|
| 58 |
-
{"current_steps": 490, "total_steps": 560, "loss": 0.0001, "lr": 4.684610648167503e-06, "epoch": 8.71111111111111, "percentage": 87.5, "elapsed_time": "1:20:27", "remaining_time": "0:11:29"}
|
| 59 |
-
{"current_steps": 500, "total_steps": 560, "loss": 0.0001, "lr": 3.4563125677897932e-06, "epoch": 8.88888888888889, "percentage": 89.29, "elapsed_time": "1:22:03", "remaining_time": "0:09:50"}
|
| 60 |
-
{"current_steps": 500, "total_steps": 560, "eval_loss": 0.11865098774433136, "epoch": 8.88888888888889, "percentage": 89.29, "elapsed_time": "1:22:14", "remaining_time": "0:09:52"}
|
| 61 |
-
{"current_steps": 510, "total_steps": 560, "loss": 0.0001, "lr": 2.408798006933882e-06, "epoch": 9.066666666666666, "percentage": 91.07, "elapsed_time": "1:23:56", "remaining_time": "0:08:13"}
|
| 62 |
-
{"current_steps": 520, "total_steps": 560, "loss": 0.0001, "lr": 1.5461356885461075e-06, "epoch": 9.244444444444444, "percentage": 92.86, "elapsed_time": "1:25:32", "remaining_time": "0:06:34"}
|
| 63 |
-
{"current_steps": 530, "total_steps": 560, "loss": 0.0, "lr": 8.716763383355864e-07, "epoch": 9.422222222222222, "percentage": 94.64, "elapsed_time": "1:27:08", "remaining_time": "0:04:55"}
|
| 64 |
-
{"current_steps": 540, "total_steps": 560, "loss": 0.0001, "lr": 3.8803966999139684e-07, "epoch": 9.6, "percentage": 96.43, "elapsed_time": "1:28:44", "remaining_time": "0:03:17"}
|
| 65 |
-
{"current_steps": 550, "total_steps": 560, "loss": 0.0001, "lr": 9.710420977340762e-08, "epoch": 9.777777777777779, "percentage": 98.21, "elapsed_time": "1:30:20", "remaining_time": "0:01:38"}
|
| 66 |
-
{"current_steps": 550, "total_steps": 560, "eval_loss": 0.11960210651159286, "epoch": 9.777777777777779, "percentage": 98.21, "elapsed_time": "1:30:30", "remaining_time": "0:01:38"}
|
| 67 |
-
{"current_steps": 560, "total_steps": 560, "loss": 0.0001, "lr": 0.0, "epoch": 9.955555555555556, "percentage": 100.0, "elapsed_time": "1:32:13", "remaining_time": "0:00:00"}
|
| 68 |
-
{"current_steps": 560, "total_steps": 560, "epoch": 9.955555555555556, "percentage": 100.0, "elapsed_time": "1:32:19", "remaining_time": "0:00:00"}
|
|
|
|
| 1 |
+
{"current_steps": 10, "total_steps": 280, "loss": 2.3562, "lr": 3.571428571428572e-05, "epoch": 0.35555555555555557, "percentage": 3.57, "elapsed_time": "0:00:37", "remaining_time": "0:16:55"}
|
| 2 |
+
{"current_steps": 20, "total_steps": 280, "loss": 0.6405, "lr": 7.142857142857143e-05, "epoch": 0.7111111111111111, "percentage": 7.14, "elapsed_time": "0:01:14", "remaining_time": "0:16:03"}
|
| 3 |
+
{"current_steps": 30, "total_steps": 280, "loss": 0.1061, "lr": 9.998445910004082e-05, "epoch": 1.0666666666666667, "percentage": 10.71, "elapsed_time": "0:01:51", "remaining_time": "0:15:33"}
|
| 4 |
+
{"current_steps": 40, "total_steps": 280, "loss": 0.0587, "lr": 9.944154131125642e-05, "epoch": 1.4222222222222223, "percentage": 14.29, "elapsed_time": "0:02:28", "remaining_time": "0:14:51"}
|
| 5 |
+
{"current_steps": 50, "total_steps": 280, "loss": 0.0719, "lr": 9.81312123475006e-05, "epoch": 1.7777777777777777, "percentage": 17.86, "elapsed_time": "0:03:06", "remaining_time": "0:14:15"}
|
| 6 |
+
{"current_steps": 50, "total_steps": 280, "eval_loss": 0.07298772037029266, "epoch": 1.7777777777777777, "percentage": 17.86, "elapsed_time": "0:03:09", "remaining_time": "0:14:31"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5560
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c44d638eae648e387b2d75777d093e6b7f0dd07e2de6dc9ccec7f8bd934b7090
|
| 3 |
size 5560
|