chchen commited on
Commit
eb74b10
·
verified ·
1 Parent(s): fe8c178

Training in progress, step 50

Browse files
.ipynb_checkpoints/llama3_lora_sft-checkpoint.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### model
2
+ model_name_or_path: google/gemma-2-9b-it
3
+ trust_remote_code: true
4
+
5
+ ### method
6
+ stage: sft
7
+ do_train: true
8
+ finetuning_type: lora
9
+ lora_target: all
10
+
11
+ ### dataset
12
+ dataset: bct_non_cot_sft_1000
13
+ dataset_dir: data_private
14
+ template: gemma
15
+ cutoff_len: 1024
16
+ # max_samples: 1000
17
+ overwrite_cache: true
18
+ preprocessing_num_workers: 16
19
+
20
+ ### output
21
+ output_dir: saves/gemma-2-9b-it/sft-1000/train
22
+ logging_steps: 10
23
+ save_steps: 50
24
+ plot_loss: true
25
+ overwrite_output_dir: true
26
+ save_total_limit: 3
27
+ push_to_hub: true
28
+ hub_model_id: chchen/gemma-2-9b-it-sft-1000
29
+ load_best_model_at_end: true
30
+
31
+ ### train
32
+ per_device_train_batch_size: 4
33
+ gradient_accumulation_steps: 8
34
+ learning_rate: 1.0e-4
35
+ num_train_epochs: 10.0
36
+ lr_scheduler_type: cosine
37
+ warmup_ratio: 0.1
38
+ bf16: true
39
+ ddp_timeout: 180000000
40
+ flash_attn: disabled
41
+
42
+ ### eval
43
+ val_size: 0.1
44
+ per_device_eval_batch_size: 4
45
+ eval_strategy: steps
46
+ eval_steps: 50
adapter_config.json CHANGED
@@ -21,12 +21,12 @@
21
  "revision": null,
22
  "target_modules": [
23
  "o_proj",
24
- "k_proj",
25
- "up_proj",
26
- "v_proj",
27
- "gate_proj",
28
  "down_proj",
29
- "q_proj"
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "o_proj",
24
+ "q_proj",
 
 
 
25
  "down_proj",
26
+ "gate_proj",
27
+ "v_proj",
28
+ "up_proj",
29
+ "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b72b5a4a57e9b32fcac206d241c92e5f8eab15e9be4c0313c975735d554608b2
3
  size 108113968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d703523c94424012be4ffe7395f2897c9d6e9e85723b40d6675e4dca3c2f1e8
3
  size 108113968
llama3_lora_sft.yaml CHANGED
@@ -18,7 +18,7 @@ overwrite_cache: true
18
  preprocessing_num_workers: 16
19
 
20
  ### output
21
- output_dir: saves/sycophancy/gemma-2-9b-it/sft-1000/train
22
  logging_steps: 10
23
  save_steps: 50
24
  plot_loss: true
@@ -29,18 +29,18 @@ hub_model_id: chchen/gemma-2-9b-it-sft-1000
29
  load_best_model_at_end: true
30
 
31
  ### train
32
- per_device_train_batch_size: 1
33
- gradient_accumulation_steps: 16
34
  learning_rate: 1.0e-4
35
  num_train_epochs: 10.0
36
  lr_scheduler_type: cosine
37
  warmup_ratio: 0.1
38
  bf16: true
39
  ddp_timeout: 180000000
40
- enable_liger_kernel: true
41
 
42
  ### eval
43
  val_size: 0.1
44
- per_device_eval_batch_size: 1
45
  eval_strategy: steps
46
  eval_steps: 50
 
18
  preprocessing_num_workers: 16
19
 
20
  ### output
21
+ output_dir: saves/gemma-2-9b-it/sft-1000/train
22
  logging_steps: 10
23
  save_steps: 50
24
  plot_loss: true
 
29
  load_best_model_at_end: true
30
 
31
  ### train
32
+ per_device_train_batch_size: 4
33
+ gradient_accumulation_steps: 8
34
  learning_rate: 1.0e-4
35
  num_train_epochs: 10.0
36
  lr_scheduler_type: cosine
37
  warmup_ratio: 0.1
38
  bf16: true
39
  ddp_timeout: 180000000
40
+ flash_attn: disabled
41
 
42
  ### eval
43
  val_size: 0.1
44
+ per_device_eval_batch_size: 4
45
  eval_strategy: steps
46
  eval_steps: 50
tokenizer_config.json CHANGED
@@ -2003,7 +2003,7 @@
2003
  "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
2004
  "clean_up_tokenization_spaces": false,
2005
  "eos_token": "<eos>",
2006
- "model_max_length": 1000000000000000019884624838656,
2007
  "pad_token": "<pad>",
2008
  "padding_side": "right",
2009
  "sp_model_kwargs": {},
 
2003
  "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
2004
  "clean_up_tokenization_spaces": false,
2005
  "eos_token": "<eos>",
2006
+ "model_max_length": 1024,
2007
  "pad_token": "<pad>",
2008
  "padding_side": "right",
2009
  "sp_model_kwargs": {},
trainer_log.jsonl CHANGED
@@ -1,68 +1,6 @@
1
- {"current_steps": 10, "total_steps": 560, "loss": 2.5118, "lr": 1.785714285714286e-05, "epoch": 0.17777777777777778, "percentage": 1.79, "elapsed_time": "0:01:45", "remaining_time": "1:36:52"}
2
- {"current_steps": 20, "total_steps": 560, "loss": 1.3514, "lr": 3.571428571428572e-05, "epoch": 0.35555555555555557, "percentage": 3.57, "elapsed_time": "0:03:23", "remaining_time": "1:31:21"}
3
- {"current_steps": 30, "total_steps": 560, "loss": 0.2902, "lr": 5.3571428571428575e-05, "epoch": 0.5333333333333333, "percentage": 5.36, "elapsed_time": "0:05:00", "remaining_time": "1:28:28"}
4
- {"current_steps": 40, "total_steps": 560, "loss": 0.075, "lr": 7.142857142857143e-05, "epoch": 0.7111111111111111, "percentage": 7.14, "elapsed_time": "0:06:35", "remaining_time": "1:25:37"}
5
- {"current_steps": 50, "total_steps": 560, "loss": 0.0799, "lr": 8.92857142857143e-05, "epoch": 0.8888888888888888, "percentage": 8.93, "elapsed_time": "0:08:10", "remaining_time": "1:23:20"}
6
- {"current_steps": 50, "total_steps": 560, "eval_loss": 0.05876053497195244, "epoch": 0.8888888888888888, "percentage": 8.93, "elapsed_time": "0:08:20", "remaining_time": "1:25:03"}
7
- {"current_steps": 60, "total_steps": 560, "loss": 0.1001, "lr": 9.998445910004082e-05, "epoch": 1.0666666666666667, "percentage": 10.71, "elapsed_time": "0:10:01", "remaining_time": "1:23:29"}
8
- {"current_steps": 70, "total_steps": 560, "loss": 0.0474, "lr": 9.980973490458728e-05, "epoch": 1.2444444444444445, "percentage": 12.5, "elapsed_time": "0:11:35", "remaining_time": "1:21:11"}
9
- {"current_steps": 80, "total_steps": 560, "loss": 0.0467, "lr": 9.944154131125642e-05, "epoch": 1.4222222222222223, "percentage": 14.29, "elapsed_time": "0:13:11", "remaining_time": "1:19:07"}
10
- {"current_steps": 90, "total_steps": 560, "loss": 0.104, "lr": 9.888130844596524e-05, "epoch": 1.6, "percentage": 16.07, "elapsed_time": "0:14:45", "remaining_time": "1:17:04"}
11
- {"current_steps": 100, "total_steps": 560, "loss": 0.0381, "lr": 9.81312123475006e-05, "epoch": 1.7777777777777777, "percentage": 17.86, "elapsed_time": "0:16:21", "remaining_time": "1:15:13"}
12
- {"current_steps": 100, "total_steps": 560, "eval_loss": 0.05637728050351143, "epoch": 1.7777777777777777, "percentage": 17.86, "elapsed_time": "0:16:31", "remaining_time": "1:16:01"}
13
- {"current_steps": 110, "total_steps": 560, "loss": 0.0982, "lr": 9.719416651541839e-05, "epoch": 1.9555555555555557, "percentage": 19.64, "elapsed_time": "0:18:11", "remaining_time": "1:14:24"}
14
- {"current_steps": 120, "total_steps": 560, "loss": 0.0354, "lr": 9.607381059352038e-05, "epoch": 2.1333333333333333, "percentage": 21.43, "elapsed_time": "0:19:46", "remaining_time": "1:12:29"}
15
- {"current_steps": 130, "total_steps": 560, "loss": 0.0235, "lr": 9.477449623286505e-05, "epoch": 2.311111111111111, "percentage": 23.21, "elapsed_time": "0:21:20", "remaining_time": "1:10:35"}
16
- {"current_steps": 140, "total_steps": 560, "loss": 0.0447, "lr": 9.330127018922194e-05, "epoch": 2.488888888888889, "percentage": 25.0, "elapsed_time": "0:22:54", "remaining_time": "1:08:44"}
17
- {"current_steps": 150, "total_steps": 560, "loss": 0.0357, "lr": 9.165985472062246e-05, "epoch": 2.6666666666666665, "percentage": 26.79, "elapsed_time": "0:24:29", "remaining_time": "1:06:57"}
18
- {"current_steps": 150, "total_steps": 560, "eval_loss": 0.0641837865114212, "epoch": 2.6666666666666665, "percentage": 26.79, "elapsed_time": "0:24:39", "remaining_time": "1:07:25"}
19
- {"current_steps": 160, "total_steps": 560, "loss": 0.0244, "lr": 8.985662536114613e-05, "epoch": 2.8444444444444446, "percentage": 28.57, "elapsed_time": "0:26:22", "remaining_time": "1:05:56"}
20
- {"current_steps": 170, "total_steps": 560, "loss": 0.0279, "lr": 8.789858615727265e-05, "epoch": 3.022222222222222, "percentage": 30.36, "elapsed_time": "0:27:58", "remaining_time": "1:04:11"}
21
- {"current_steps": 180, "total_steps": 560, "loss": 0.0101, "lr": 8.579334246298593e-05, "epoch": 3.2, "percentage": 32.14, "elapsed_time": "0:29:34", "remaining_time": "1:02:26"}
22
- {"current_steps": 190, "total_steps": 560, "loss": 0.0122, "lr": 8.354907139929851e-05, "epoch": 3.3777777777777778, "percentage": 33.93, "elapsed_time": "0:31:09", "remaining_time": "1:00:40"}
23
- {"current_steps": 200, "total_steps": 560, "loss": 0.0104, "lr": 8.117449009293668e-05, "epoch": 3.5555555555555554, "percentage": 35.71, "elapsed_time": "0:32:44", "remaining_time": "0:58:55"}
24
- {"current_steps": 200, "total_steps": 560, "eval_loss": 0.08028902113437653, "epoch": 3.5555555555555554, "percentage": 35.71, "elapsed_time": "0:32:54", "remaining_time": "0:59:14"}
25
- {"current_steps": 210, "total_steps": 560, "loss": 0.0075, "lr": 7.86788218175523e-05, "epoch": 3.7333333333333334, "percentage": 37.5, "elapsed_time": "0:34:35", "remaining_time": "0:57:39"}
26
- {"current_steps": 220, "total_steps": 560, "loss": 0.0197, "lr": 7.60717601689749e-05, "epoch": 3.911111111111111, "percentage": 39.29, "elapsed_time": "0:36:08", "remaining_time": "0:55:51"}
27
- {"current_steps": 230, "total_steps": 560, "loss": 0.0078, "lr": 7.33634314136531e-05, "epoch": 4.088888888888889, "percentage": 41.07, "elapsed_time": "0:37:45", "remaining_time": "0:54:10"}
28
- {"current_steps": 240, "total_steps": 560, "loss": 0.0052, "lr": 7.056435515653059e-05, "epoch": 4.266666666666667, "percentage": 42.86, "elapsed_time": "0:39:19", "remaining_time": "0:52:26"}
29
- {"current_steps": 250, "total_steps": 560, "loss": 0.0108, "lr": 6.768540348112907e-05, "epoch": 4.444444444444445, "percentage": 44.64, "elapsed_time": "0:40:55", "remaining_time": "0:50:45"}
30
- {"current_steps": 250, "total_steps": 560, "eval_loss": 0.078793466091156, "epoch": 4.444444444444445, "percentage": 44.64, "elapsed_time": "0:41:05", "remaining_time": "0:50:57"}
31
- {"current_steps": 260, "total_steps": 560, "loss": 0.0024, "lr": 6.473775872054521e-05, "epoch": 4.622222222222222, "percentage": 46.43, "elapsed_time": "0:42:48", "remaining_time": "0:49:23"}
32
- {"current_steps": 270, "total_steps": 560, "loss": 0.0052, "lr": 6.173287002338577e-05, "epoch": 4.8, "percentage": 48.21, "elapsed_time": "0:44:23", "remaining_time": "0:47:41"}
33
- {"current_steps": 280, "total_steps": 560, "loss": 0.0032, "lr": 5.868240888334653e-05, "epoch": 4.977777777777778, "percentage": 50.0, "elapsed_time": "0:46:00", "remaining_time": "0:46:00"}
34
- {"current_steps": 290, "total_steps": 560, "loss": 0.0048, "lr": 5.559822380516539e-05, "epoch": 5.155555555555556, "percentage": 51.79, "elapsed_time": "0:47:35", "remaining_time": "0:44:18"}
35
- {"current_steps": 300, "total_steps": 560, "loss": 0.0008, "lr": 5.249229428303486e-05, "epoch": 5.333333333333333, "percentage": 53.57, "elapsed_time": "0:49:11", "remaining_time": "0:42:37"}
36
- {"current_steps": 300, "total_steps": 560, "eval_loss": 0.08963307738304138, "epoch": 5.333333333333333, "percentage": 53.57, "elapsed_time": "0:49:21", "remaining_time": "0:42:46"}
37
- {"current_steps": 310, "total_steps": 560, "loss": 0.0003, "lr": 4.9376684270229254e-05, "epoch": 5.511111111111111, "percentage": 55.36, "elapsed_time": "0:51:02", "remaining_time": "0:41:09"}
38
- {"current_steps": 320, "total_steps": 560, "loss": 0.0006, "lr": 4.626349532067879e-05, "epoch": 5.688888888888889, "percentage": 57.14, "elapsed_time": "0:52:37", "remaining_time": "0:39:28"}
39
- {"current_steps": 330, "total_steps": 560, "loss": 0.0007, "lr": 4.316481958449634e-05, "epoch": 5.866666666666667, "percentage": 58.93, "elapsed_time": "0:54:12", "remaining_time": "0:37:46"}
40
- {"current_steps": 340, "total_steps": 560, "loss": 0.001, "lr": 4.0092692840030134e-05, "epoch": 6.044444444444444, "percentage": 60.71, "elapsed_time": "0:55:47", "remaining_time": "0:36:06"}
41
- {"current_steps": 350, "total_steps": 560, "loss": 0.0001, "lr": 3.705904774487396e-05, "epoch": 6.222222222222222, "percentage": 62.5, "elapsed_time": "0:57:23", "remaining_time": "0:34:25"}
42
- {"current_steps": 350, "total_steps": 560, "eval_loss": 0.10594847053289413, "epoch": 6.222222222222222, "percentage": 62.5, "elapsed_time": "0:57:32", "remaining_time": "0:34:31"}
43
- {"current_steps": 360, "total_steps": 560, "loss": 0.0002, "lr": 3.4075667487415785e-05, "epoch": 6.4, "percentage": 64.29, "elapsed_time": "0:59:14", "remaining_time": "0:32:54"}
44
- {"current_steps": 370, "total_steps": 560, "loss": 0.0004, "lr": 3.115414001894974e-05, "epoch": 6.5777777777777775, "percentage": 66.07, "elapsed_time": "1:00:49", "remaining_time": "0:31:13"}
45
- {"current_steps": 380, "total_steps": 560, "loss": 0.0001, "lr": 2.8305813044122097e-05, "epoch": 6.7555555555555555, "percentage": 67.86, "elapsed_time": "1:02:24", "remaining_time": "0:29:33"}
46
- {"current_steps": 390, "total_steps": 560, "loss": 0.0001, "lr": 2.5541749944535554e-05, "epoch": 6.933333333333334, "percentage": 69.64, "elapsed_time": "1:03:59", "remaining_time": "0:27:53"}
47
- {"current_steps": 400, "total_steps": 560, "loss": 0.0001, "lr": 2.2872686806712035e-05, "epoch": 7.111111111111111, "percentage": 71.43, "elapsed_time": "1:05:34", "remaining_time": "0:26:13"}
48
- {"current_steps": 400, "total_steps": 560, "eval_loss": 0.11472605913877487, "epoch": 7.111111111111111, "percentage": 71.43, "elapsed_time": "1:05:43", "remaining_time": "0:26:17"}
49
- {"current_steps": 410, "total_steps": 560, "loss": 0.0, "lr": 2.0308990721324927e-05, "epoch": 7.288888888888889, "percentage": 73.21, "elapsed_time": "1:07:25", "remaining_time": "0:24:39"}
50
- {"current_steps": 420, "total_steps": 560, "loss": 0.0001, "lr": 1.7860619515673033e-05, "epoch": 7.466666666666667, "percentage": 75.0, "elapsed_time": "1:08:59", "remaining_time": "0:22:59"}
51
- {"current_steps": 430, "total_steps": 560, "loss": 0.0001, "lr": 1.553708307580265e-05, "epoch": 7.644444444444445, "percentage": 76.79, "elapsed_time": "1:10:34", "remaining_time": "0:21:20"}
52
- {"current_steps": 440, "total_steps": 560, "loss": 0.0001, "lr": 1.3347406408508695e-05, "epoch": 7.822222222222222, "percentage": 78.57, "elapsed_time": "1:12:10", "remaining_time": "0:19:41"}
53
- {"current_steps": 450, "total_steps": 560, "loss": 0.0001, "lr": 1.130009458668863e-05, "epoch": 8.0, "percentage": 80.36, "elapsed_time": "1:13:46", "remaining_time": "0:18:01"}
54
- {"current_steps": 450, "total_steps": 560, "eval_loss": 0.11791631579399109, "epoch": 8.0, "percentage": 80.36, "elapsed_time": "1:13:55", "remaining_time": "0:18:04"}
55
- {"current_steps": 460, "total_steps": 560, "loss": 0.0, "lr": 9.403099714207175e-06, "epoch": 8.177777777777777, "percentage": 82.14, "elapsed_time": "1:15:39", "remaining_time": "0:16:26"}
56
- {"current_steps": 470, "total_steps": 560, "loss": 0.0, "lr": 7.663790038585793e-06, "epoch": 8.355555555555556, "percentage": 83.93, "elapsed_time": "1:17:15", "remaining_time": "0:14:47"}
57
- {"current_steps": 480, "total_steps": 560, "loss": 0.0001, "lr": 6.088921331488568e-06, "epoch": 8.533333333333333, "percentage": 85.71, "elapsed_time": "1:18:50", "remaining_time": "0:13:08"}
58
- {"current_steps": 490, "total_steps": 560, "loss": 0.0001, "lr": 4.684610648167503e-06, "epoch": 8.71111111111111, "percentage": 87.5, "elapsed_time": "1:20:27", "remaining_time": "0:11:29"}
59
- {"current_steps": 500, "total_steps": 560, "loss": 0.0001, "lr": 3.4563125677897932e-06, "epoch": 8.88888888888889, "percentage": 89.29, "elapsed_time": "1:22:03", "remaining_time": "0:09:50"}
60
- {"current_steps": 500, "total_steps": 560, "eval_loss": 0.11865098774433136, "epoch": 8.88888888888889, "percentage": 89.29, "elapsed_time": "1:22:14", "remaining_time": "0:09:52"}
61
- {"current_steps": 510, "total_steps": 560, "loss": 0.0001, "lr": 2.408798006933882e-06, "epoch": 9.066666666666666, "percentage": 91.07, "elapsed_time": "1:23:56", "remaining_time": "0:08:13"}
62
- {"current_steps": 520, "total_steps": 560, "loss": 0.0001, "lr": 1.5461356885461075e-06, "epoch": 9.244444444444444, "percentage": 92.86, "elapsed_time": "1:25:32", "remaining_time": "0:06:34"}
63
- {"current_steps": 530, "total_steps": 560, "loss": 0.0, "lr": 8.716763383355864e-07, "epoch": 9.422222222222222, "percentage": 94.64, "elapsed_time": "1:27:08", "remaining_time": "0:04:55"}
64
- {"current_steps": 540, "total_steps": 560, "loss": 0.0001, "lr": 3.8803966999139684e-07, "epoch": 9.6, "percentage": 96.43, "elapsed_time": "1:28:44", "remaining_time": "0:03:17"}
65
- {"current_steps": 550, "total_steps": 560, "loss": 0.0001, "lr": 9.710420977340762e-08, "epoch": 9.777777777777779, "percentage": 98.21, "elapsed_time": "1:30:20", "remaining_time": "0:01:38"}
66
- {"current_steps": 550, "total_steps": 560, "eval_loss": 0.11960210651159286, "epoch": 9.777777777777779, "percentage": 98.21, "elapsed_time": "1:30:30", "remaining_time": "0:01:38"}
67
- {"current_steps": 560, "total_steps": 560, "loss": 0.0001, "lr": 0.0, "epoch": 9.955555555555556, "percentage": 100.0, "elapsed_time": "1:32:13", "remaining_time": "0:00:00"}
68
- {"current_steps": 560, "total_steps": 560, "epoch": 9.955555555555556, "percentage": 100.0, "elapsed_time": "1:32:19", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 10, "total_steps": 280, "loss": 2.3562, "lr": 3.571428571428572e-05, "epoch": 0.35555555555555557, "percentage": 3.57, "elapsed_time": "0:00:37", "remaining_time": "0:16:55"}
2
+ {"current_steps": 20, "total_steps": 280, "loss": 0.6405, "lr": 7.142857142857143e-05, "epoch": 0.7111111111111111, "percentage": 7.14, "elapsed_time": "0:01:14", "remaining_time": "0:16:03"}
3
+ {"current_steps": 30, "total_steps": 280, "loss": 0.1061, "lr": 9.998445910004082e-05, "epoch": 1.0666666666666667, "percentage": 10.71, "elapsed_time": "0:01:51", "remaining_time": "0:15:33"}
4
+ {"current_steps": 40, "total_steps": 280, "loss": 0.0587, "lr": 9.944154131125642e-05, "epoch": 1.4222222222222223, "percentage": 14.29, "elapsed_time": "0:02:28", "remaining_time": "0:14:51"}
5
+ {"current_steps": 50, "total_steps": 280, "loss": 0.0719, "lr": 9.81312123475006e-05, "epoch": 1.7777777777777777, "percentage": 17.86, "elapsed_time": "0:03:06", "remaining_time": "0:14:15"}
6
+ {"current_steps": 50, "total_steps": 280, "eval_loss": 0.07298772037029266, "epoch": 1.7777777777777777, "percentage": 17.86, "elapsed_time": "0:03:09", "remaining_time": "0:14:31"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd62b654f623077f50c7f74ec1c9713fa47012417657e5c83a7aa470e3c14a77
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c44d638eae648e387b2d75777d093e6b7f0dd07e2de6dc9ccec7f8bd934b7090
3
  size 5560