SM commited on
Commit
c3eb26d
·
1 Parent(s): 90db77c

With better accuracy

Browse files
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 52.0337
21
- - Accuracy: 0.1243
22
 
23
  ## Model description
24
 
@@ -38,12 +38,12 @@ More information needed
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 5e-05
41
- - train_batch_size: 2
42
- - eval_batch_size: 2
43
  - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
- - num_epochs: 40.0
47
 
48
  ### Training results
49
 
@@ -52,6 +52,6 @@ The following hyperparameters were used during training:
52
  ### Framework versions
53
 
54
  - Transformers 4.37.0.dev0
55
- - Pytorch 2.1.2
56
- - Datasets 2.15.0
57
  - Tokenizers 0.15.0
 
17
 
18
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 3.5093
21
+ - Accuracy: 0.4494
22
 
23
  ## Model description
24
 
 
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 5e-05
41
+ - train_batch_size: 4
42
+ - eval_batch_size: 4
43
  - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
+ - num_epochs: 60.0
47
 
48
  ### Training results
49
 
 
52
  ### Framework versions
53
 
54
  - Transformers 4.37.0.dev0
55
+ - Pytorch 2.1.2+cu121
56
+ - Datasets 2.16.0
57
  - Tokenizers 0.15.0
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 40.0,
3
- "eval_accuracy": 0.12425328554360812,
4
- "eval_loss": 52.03367233276367,
5
- "eval_runtime": 4.1042,
6
  "eval_samples": 9,
7
- "eval_samples_per_second": 2.193,
8
- "eval_steps_per_second": 1.218,
9
- "perplexity": 3.962203408827054e+22,
10
- "train_loss": 57.43311643738677,
11
- "train_runtime": 10482.6781,
12
  "train_samples": 138,
13
- "train_samples_per_second": 0.527,
14
- "train_steps_per_second": 0.263
15
  }
 
1
  {
2
+ "epoch": 60.0,
3
+ "eval_accuracy": 0.4494406429890301,
4
+ "eval_loss": 3.5092644691467285,
5
+ "eval_runtime": 0.6069,
6
  "eval_samples": 9,
7
+ "eval_samples_per_second": 14.829,
8
+ "eval_steps_per_second": 4.943,
9
+ "perplexity": 33.423674596634115,
10
+ "train_loss": 0.9941332481020972,
11
+ "train_runtime": 1577.313,
12
  "train_samples": 138,
13
+ "train_samples_per_second": 5.249,
14
+ "train_steps_per_second": 1.331
15
  }
checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfcdd32060421fc062c6972b23088021b78ee341a6ba56ac82f86eaea8a9be39
3
  size 497774208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12f996d0719a66b10d90e36ef43b31f9eaae1c31749508f6854af308bb085736
3
  size 497774208
checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40792add400940242337cb4f1c1ded33fc53932d579e2aafc1ad92e26b9120ad
3
- size 995638202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9adc9e7ab852a33f9fe84c647c87587bada024b6538c02088b10c9c8717806e2
3
+ size 995642298
checkpoint-1000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2248774053cf007b7093c6e0bb2c3b3dd6eaa25d185fd835bab801482da4e4b0
3
- size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a6632d83b7bb45efc05bef8c034ed3b2854a29a949ed96a7ca5bd50bcb7d902
3
+ size 14244
checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3898258d676f040a88d5e204cd4b72f355d3dc5e6acf2f9d957635fad24937e8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eee3c9505132ed967b6539dd4a6fb45e2bc29520ec4ec39ac5c68d846d45dec5
3
  size 1064
checkpoint-1000/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.492753623188406,
5
  "eval_steps": 500,
6
  "global_step": 1000,
7
  "is_hyper_param_search": false,
@@ -9,25 +9,25 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 7.25,
13
- "learning_rate": 4.094202898550725e-05,
14
- "loss": 52.964,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 14.49,
19
- "learning_rate": 3.188405797101449e-05,
20
- "loss": 63.81,
21
  "step": 1000
22
  }
23
  ],
24
  "logging_steps": 500,
25
- "max_steps": 2760,
26
  "num_input_tokens_seen": 0,
27
- "num_train_epochs": 40,
28
  "save_steps": 500,
29
- "total_flos": 1045168128000000.0,
30
- "train_batch_size": 2,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 28.571428571428573,
5
  "eval_steps": 500,
6
  "global_step": 1000,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 14.29,
13
+ "learning_rate": 3.809523809523809e-05,
14
+ "loss": 1.8052,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 28.57,
19
+ "learning_rate": 2.6190476190476192e-05,
20
+ "loss": 1.0819,
21
  "step": 1000
22
  }
23
  ],
24
  "logging_steps": 500,
25
+ "max_steps": 2100,
26
  "num_input_tokens_seen": 0,
27
+ "num_train_epochs": 60,
28
  "save_steps": 500,
29
+ "total_flos": 2061071548416000.0,
30
+ "train_batch_size": 4,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3901907ca8b14655a382a70720bd9e1bb2f76f1edb2679dd829e743bc3f6bc3e
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
3
  size 4664
checkpoint-1500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42f5e565cdb79f9110a6d84d8389311e50392871d64a8891dbde0a227a8788dc
3
  size 497774208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db548188c57c7cc26a03d2f3836dac8ae7b3f171ffc94f210669f0684391440
3
  size 497774208
checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e601a8de001ab43374799bb279945ab8304ecc9cb6457dd39819746e3509e5a
3
- size 995638202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0857cce43798c01f9e56b21f550de159b359b827bbbe6664dc0920bb722a5373
3
+ size 995642298
checkpoint-1500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13fd47b12859b8841c4b8248c9b246be3d9ced25781b423c40d0b3a010fa7653
3
- size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c00755da9ed0bcc7ab4ff6d1881daf48c315a760cde6596f5e5de6ebdb5140f8
3
+ size 14244
checkpoint-1500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d8150471eaa0602abf5ca49129f5d5e1a49fbee7998e0a72bf6f710952d97a1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f136c5ddfbd5bfe9857c433dcc2fc706e931bc068d2eabf598b25c109d462906
3
  size 1064
checkpoint-1500/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 21.73913043478261,
5
  "eval_steps": 500,
6
  "global_step": 1500,
7
  "is_hyper_param_search": false,
@@ -9,31 +9,31 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 7.25,
13
- "learning_rate": 4.094202898550725e-05,
14
- "loss": 52.964,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 14.49,
19
- "learning_rate": 3.188405797101449e-05,
20
- "loss": 63.81,
21
  "step": 1000
22
  },
23
  {
24
- "epoch": 21.74,
25
- "learning_rate": 2.282608695652174e-05,
26
- "loss": 62.5429,
27
  "step": 1500
28
  }
29
  ],
30
  "logging_steps": 500,
31
- "max_steps": 2760,
32
  "num_input_tokens_seen": 0,
33
- "num_train_epochs": 40,
34
  "save_steps": 500,
35
- "total_flos": 1567752192000000.0,
36
- "train_batch_size": 2,
37
  "trial_name": null,
38
  "trial_params": null
39
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 42.857142857142854,
5
  "eval_steps": 500,
6
  "global_step": 1500,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 14.29,
13
+ "learning_rate": 3.809523809523809e-05,
14
+ "loss": 1.8052,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 28.57,
19
+ "learning_rate": 2.6190476190476192e-05,
20
+ "loss": 1.0819,
21
  "step": 1000
22
  },
23
  {
24
+ "epoch": 42.86,
25
+ "learning_rate": 1.4285714285714285e-05,
26
+ "loss": 0.6781,
27
  "step": 1500
28
  }
29
  ],
30
  "logging_steps": 500,
31
+ "max_steps": 2100,
32
  "num_input_tokens_seen": 0,
33
+ "num_train_epochs": 60,
34
  "save_steps": 500,
35
+ "total_flos": 3091607322624000.0,
36
+ "train_batch_size": 4,
37
  "trial_name": null,
38
  "trial_params": null
39
  }
checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3901907ca8b14655a382a70720bd9e1bb2f76f1edb2679dd829e743bc3f6bc3e
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
3
  size 4664
checkpoint-2000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcbe070b82059badc3cff1bfc0bcae3f883ada68f07a60fa8da20273ad31d041
3
  size 497774208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49f835d6e202641570e280cfd02c7abc724ffcf647ef3c4919ddf9d0244fefb0
3
  size 497774208
checkpoint-2000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52b6e90b1598b433558c8544104af14d2e9899a893662f3665492f6a88cfb7e1
3
- size 995638202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5953577a0a2b353baf20d78a8d3cafd7804195fc51c78ba605dbd587f53247e
3
+ size 995642298
checkpoint-2000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8af998d92b14891eae8da6a02f34398e26c284418aafc0720f904f72ebc45e9b
3
- size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54f33e1ce978f1c3e97263679165565a2045be7c9c46fe4e4856a58d36de3efa
3
+ size 14244
checkpoint-2000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6dd30ada5b40093c7c92eee80875a56bbece06a0cd26cc8b5c5b15dca76defd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9389d2d6b649add7b167e5f96d84163bb4de87fbdbcea2e7d94c8fc162243048
3
  size 1064
checkpoint-2000/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 28.985507246376812,
5
  "eval_steps": 500,
6
  "global_step": 2000,
7
  "is_hyper_param_search": false,
@@ -9,37 +9,37 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 7.25,
13
- "learning_rate": 4.094202898550725e-05,
14
- "loss": 52.964,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 14.49,
19
- "learning_rate": 3.188405797101449e-05,
20
- "loss": 63.81,
21
  "step": 1000
22
  },
23
  {
24
- "epoch": 21.74,
25
- "learning_rate": 2.282608695652174e-05,
26
- "loss": 62.5429,
27
  "step": 1500
28
  },
29
  {
30
- "epoch": 28.99,
31
- "learning_rate": 1.3768115942028985e-05,
32
- "loss": 57.5548,
33
  "step": 2000
34
  }
35
  ],
36
  "logging_steps": 500,
37
- "max_steps": 2760,
38
  "num_input_tokens_seen": 0,
39
- "num_train_epochs": 40,
40
  "save_steps": 500,
41
- "total_flos": 2090336256000000.0,
42
- "train_batch_size": 2,
43
  "trial_name": null,
44
  "trial_params": null
45
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 57.142857142857146,
5
  "eval_steps": 500,
6
  "global_step": 2000,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 14.29,
13
+ "learning_rate": 3.809523809523809e-05,
14
+ "loss": 1.8052,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 28.57,
19
+ "learning_rate": 2.6190476190476192e-05,
20
+ "loss": 1.0819,
21
  "step": 1000
22
  },
23
  {
24
+ "epoch": 42.86,
25
+ "learning_rate": 1.4285714285714285e-05,
26
+ "loss": 0.6781,
27
  "step": 1500
28
  },
29
  {
30
+ "epoch": 57.14,
31
+ "learning_rate": 2.3809523809523808e-06,
32
+ "loss": 0.5142,
33
  "step": 2000
34
  }
35
  ],
36
  "logging_steps": 500,
37
+ "max_steps": 2100,
38
  "num_input_tokens_seen": 0,
39
+ "num_train_epochs": 60,
40
  "save_steps": 500,
41
+ "total_flos": 4121097928704000.0,
42
+ "train_batch_size": 4,
43
  "trial_name": null,
44
  "trial_params": null
45
  }
checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3901907ca8b14655a382a70720bd9e1bb2f76f1edb2679dd829e743bc3f6bc3e
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
3
  size 4664
checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a838afbfe8df7d2ae25ab6ef968e9623a0ef1b80479cacc84732d3688e94ca49
3
  size 497774208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b4dcf13f972feb06a7bc56ddc93ab0a631fe7d88e27643cbf9c0e042041fcff
3
  size 497774208
checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9030cdfbb9f6a84c732f3444b8c651a2122dc91f07c08553a04a4a59d4d5e919
3
- size 995638202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84d55291718bd821470b8b79351ab3e47b578d3bc202542399220a633dc12848
3
+ size 995642298
checkpoint-500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fed77b14fe062f6db72d68cedd6fd95bae3305b7a735eef3c85da43fd15d476
3
- size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e651e34957d21ef1419934c9c311ba824f956fd612f9f4b5bea9e4854d09d528
3
+ size 14244
checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1835112c9c4e116fab49de7670619145c7152adb2a54074e2003a8ced014d3ac
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d84cb0a17b808448928ddd21fdfb54eabfda0598dfe3f0b7eebb6d442d67f65
3
  size 1064
checkpoint-500/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.246376811594203,
5
  "eval_steps": 500,
6
  "global_step": 500,
7
  "is_hyper_param_search": false,
@@ -9,19 +9,19 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 7.25,
13
- "learning_rate": 4.094202898550725e-05,
14
- "loss": 52.964,
15
  "step": 500
16
  }
17
  ],
18
  "logging_steps": 500,
19
- "max_steps": 2760,
20
  "num_input_tokens_seen": 0,
21
- "num_train_epochs": 40,
22
  "save_steps": 500,
23
- "total_flos": 522584064000000.0,
24
- "train_batch_size": 2,
25
  "trial_name": null,
26
  "trial_params": null
27
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.285714285714286,
5
  "eval_steps": 500,
6
  "global_step": 500,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 14.29,
13
+ "learning_rate": 3.809523809523809e-05,
14
+ "loss": 1.8052,
15
  "step": 500
16
  }
17
  ],
18
  "logging_steps": 500,
19
+ "max_steps": 2100,
20
  "num_input_tokens_seen": 0,
21
+ "num_train_epochs": 60,
22
  "save_steps": 500,
23
+ "total_flos": 1030535774208000.0,
24
+ "train_batch_size": 4,
25
  "trial_name": null,
26
  "trial_params": null
27
  }
checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3901907ca8b14655a382a70720bd9e1bb2f76f1edb2679dd829e743bc3f6bc3e
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
3
  size 4664
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 40.0,
3
- "eval_accuracy": 0.12425328554360812,
4
- "eval_loss": 52.03367233276367,
5
- "eval_runtime": 4.1042,
6
  "eval_samples": 9,
7
- "eval_samples_per_second": 2.193,
8
- "eval_steps_per_second": 1.218,
9
- "perplexity": 3.962203408827054e+22
10
  }
 
1
  {
2
+ "epoch": 60.0,
3
+ "eval_accuracy": 0.4494406429890301,
4
+ "eval_loss": 3.5092644691467285,
5
+ "eval_runtime": 0.6069,
6
  "eval_samples": 9,
7
+ "eval_samples_per_second": 14.829,
8
+ "eval_steps_per_second": 4.943,
9
+ "perplexity": 33.423674596634115
10
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:972cf3d0d3d030aae15410a32d454084f803350380f05215f9f7a4c30ffc505a
3
  size 497774208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aaa8c132a2f82e4f3a057cd8336ab86a6452f68defee110eba5fbb71b1e662c
3
  size 497774208
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 40.0,
3
- "train_loss": 57.43311643738677,
4
- "train_runtime": 10482.6781,
5
  "train_samples": 138,
6
- "train_samples_per_second": 0.527,
7
- "train_steps_per_second": 0.263
8
  }
 
1
  {
2
+ "epoch": 60.0,
3
+ "train_loss": 0.9941332481020972,
4
+ "train_runtime": 1577.313,
5
  "train_samples": 138,
6
+ "train_samples_per_second": 5.249,
7
+ "train_steps_per_second": 1.331
8
  }
trainer_state.json CHANGED
@@ -1,60 +1,54 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 40.0,
5
  "eval_steps": 500,
6
- "global_step": 2760,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 7.25,
13
- "learning_rate": 4.094202898550725e-05,
14
- "loss": 52.964,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 14.49,
19
- "learning_rate": 3.188405797101449e-05,
20
- "loss": 63.81,
21
  "step": 1000
22
  },
23
  {
24
- "epoch": 21.74,
25
- "learning_rate": 2.282608695652174e-05,
26
- "loss": 62.5429,
27
  "step": 1500
28
  },
29
  {
30
- "epoch": 28.99,
31
- "learning_rate": 1.3768115942028985e-05,
32
- "loss": 57.5548,
33
  "step": 2000
34
  },
35
  {
36
- "epoch": 36.23,
37
- "learning_rate": 4.710144927536232e-06,
38
- "loss": 53.2908,
39
- "step": 2500
40
- },
41
- {
42
- "epoch": 40.0,
43
- "step": 2760,
44
- "total_flos": 2884664033280000.0,
45
- "train_loss": 57.43311643738677,
46
- "train_runtime": 10482.6781,
47
- "train_samples_per_second": 0.527,
48
- "train_steps_per_second": 0.263
49
  }
50
  ],
51
  "logging_steps": 500,
52
- "max_steps": 2760,
53
  "num_input_tokens_seen": 0,
54
- "num_train_epochs": 40,
55
  "save_steps": 500,
56
- "total_flos": 2884664033280000.0,
57
- "train_batch_size": 2,
58
  "trial_name": null,
59
  "trial_params": null
60
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 60.0,
5
  "eval_steps": 500,
6
+ "global_step": 2100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 14.29,
13
+ "learning_rate": 3.809523809523809e-05,
14
+ "loss": 1.8052,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 28.57,
19
+ "learning_rate": 2.6190476190476192e-05,
20
+ "loss": 1.0819,
21
  "step": 1000
22
  },
23
  {
24
+ "epoch": 42.86,
25
+ "learning_rate": 1.4285714285714285e-05,
26
+ "loss": 0.6781,
27
  "step": 1500
28
  },
29
  {
30
+ "epoch": 57.14,
31
+ "learning_rate": 2.3809523809523808e-06,
32
+ "loss": 0.5142,
33
  "step": 2000
34
  },
35
  {
36
+ "epoch": 60.0,
37
+ "step": 2100,
38
+ "total_flos": 4326996049920000.0,
39
+ "train_loss": 0.9941332481020972,
40
+ "train_runtime": 1577.313,
41
+ "train_samples_per_second": 5.249,
42
+ "train_steps_per_second": 1.331
 
 
 
 
 
 
43
  }
44
  ],
45
  "logging_steps": 500,
46
+ "max_steps": 2100,
47
  "num_input_tokens_seen": 0,
48
+ "num_train_epochs": 60,
49
  "save_steps": 500,
50
+ "total_flos": 4326996049920000.0,
51
+ "train_batch_size": 4,
52
  "trial_name": null,
53
  "trial_params": null
54
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3901907ca8b14655a382a70720bd9e1bb2f76f1edb2679dd829e743bc3f6bc3e
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
3
  size 4664