mgh6 commited on
Commit
f6cc5b4
·
1 Parent(s): 0449940

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82c59ec2d8158df03dc91fa7bf16e8507d566511b5411f9c67de05e90ccd6e91
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adb2b72f8723f4968194cbd9677da9f0d3b95a3bb2b456223aff39c48f66ce55
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a884ac11bbb6fd1dc22614df6924f8dd43c3773bcea3ac35a711f03ab60f071f
3
- size 268170437
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6317f9659375722e0ffcd3bddd1b4887cc0d0a47a8d9f521c95f0e1777589cb
3
+ size 268176506
last-checkpoint/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4949af6620fa4439fb03a0b241a4168f13ef96a69dbb8f746f58c0945aaef872
3
+ size 14512
last-checkpoint/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:206aee04b08be982c9ae1d22485ea9da39a726e50a79ff31b2bb78bdde57bf85
3
+ size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e71f795f060986bf4341bea9d8720ae7bfda0098d9ee25a8bcebb898741611d0
3
- size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49d75955e6c714b42e871e5da259951f06c1c7e918ec09a61baa0ed321fb603c
3
+ size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06210408644888834,
5
  "eval_steps": 500,
6
  "global_step": 500,
7
  "is_hyper_param_search": false,
@@ -9,50 +9,50 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.01,
13
- "learning_rate": 9.998757918271022e-05,
14
- "loss": 1.4897,
15
  "step": 100
16
  },
17
  {
18
- "epoch": 0.02,
19
- "learning_rate": 9.997515836542045e-05,
20
- "loss": 1.4177,
21
  "step": 200
22
  },
23
  {
24
- "epoch": 0.04,
25
- "learning_rate": 9.996273754813067e-05,
26
- "loss": 1.3882,
27
  "step": 300
28
  },
29
  {
30
- "epoch": 0.05,
31
- "learning_rate": 9.99503167308409e-05,
32
- "loss": 1.4079,
33
  "step": 400
34
  },
35
  {
36
- "epoch": 0.06,
37
- "learning_rate": 9.993789591355111e-05,
38
- "loss": 1.3846,
39
  "step": 500
40
  },
41
  {
42
- "epoch": 0.06,
43
- "eval_loss": 1.3667649030685425,
44
- "eval_runtime": 632.9435,
45
- "eval_samples_per_second": 25.53,
46
- "eval_steps_per_second": 0.4,
47
  "step": 500
48
  }
49
  ],
50
  "logging_steps": 100,
51
- "max_steps": 805100,
52
  "num_input_tokens_seen": 0,
53
  "num_train_epochs": 100,
54
  "save_steps": 500,
55
- "total_flos": 2500969057152000.0,
56
  "train_batch_size": 64,
57
  "trial_name": null,
58
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.12419274714356682,
5
  "eval_steps": 500,
6
  "global_step": 500,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.02,
13
+ "learning_rate": 9.997516145057129e-05,
14
+ "loss": 1.4447,
15
  "step": 100
16
  },
17
  {
18
+ "epoch": 0.05,
19
+ "learning_rate": 9.995032290114258e-05,
20
+ "loss": 1.3886,
21
  "step": 200
22
  },
23
  {
24
+ "epoch": 0.07,
25
+ "learning_rate": 9.992548435171386e-05,
26
+ "loss": 1.3792,
27
  "step": 300
28
  },
29
  {
30
+ "epoch": 0.1,
31
+ "learning_rate": 9.990064580228516e-05,
32
+ "loss": 1.3473,
33
  "step": 400
34
  },
35
  {
36
+ "epoch": 0.12,
37
+ "learning_rate": 9.987580725285644e-05,
38
+ "loss": 1.3557,
39
  "step": 500
40
  },
41
  {
42
+ "epoch": 0.12,
43
+ "eval_loss": 1.3079262971878052,
44
+ "eval_runtime": 75.0933,
45
+ "eval_samples_per_second": 215.186,
46
+ "eval_steps_per_second": 1.691,
47
  "step": 500
48
  }
49
  ],
50
  "logging_steps": 100,
51
+ "max_steps": 402600,
52
  "num_input_tokens_seen": 0,
53
  "num_train_epochs": 100,
54
  "save_steps": 500,
55
+ "total_flos": 5001938038947840.0,
56
  "train_batch_size": 64,
57
  "trial_name": null,
58
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6d59a9adb744b2a139b8a100e72cc86e9fa7d2ee2e2b625692bb081e86590b4
3
- size 4283
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17377382dfbf5abb92d5eacb4e8dfb49159713b7f4459e7175a1e6ac74f6b199
3
+ size 4728