fpadovani commited on
Commit
6fbdc53
·
verified ·
1 Parent(s): 5c165bd

Training in progress, step 1500, checkpoint

Browse files
checkpoint-1500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9544c5e4c90db360cba5429b9e6d82f61b7d097443d0d5eee1e67cca8ce28ca
3
  size 435544704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1e8032fe6e2b361176c658949a7944ff5451ace2677bced8d9d648430bb30c5
3
  size 435544704
checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:077f6e457ca163e65e6965512d6c70c058b4354f40d064f5c7b6c6a8119be0fd
3
  size 871183627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f24a0f2d586b9c45412afdd1903570ccc56c5ebb7c95aaad362b0ea54f6ebf7a
3
  size 871183627
checkpoint-1500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20c27b687f67409fa19b30413659b9d44e1bfcba7c27b49a084aa2c5cf01e814
3
  size 14709
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5736fc4f79b049d43cebb82bd8d6e381c56b5d731c447ebb08a84d1fa2ff4850
3
  size 14709
checkpoint-1500/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca372268f4fa9335030c0cb7aedb6cdba75f457da50e7a4034abb1a2d0843689
3
+ size 1383
checkpoint-1500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9315627fa2833233ff9b197f9fc0f1bf1637a88d74b2d6257547556641609ee4
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8259265c70704be41b3a0660d7303797b6c392f95a832ca95ac25d45da51f204
3
  size 1465
checkpoint-1500/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-1500/trainer_state.json CHANGED
@@ -1,41 +1,54 @@
1
  {
2
- "best_global_step": 1479,
3
- "best_metric": 3.420259475708008,
4
  "best_model_checkpoint": null,
5
- "epoch": 3.0425963488843815,
6
  "eval_steps": 500,
7
  "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  {
13
  "epoch": 1.0,
14
- "eval_loss": 3.699713945388794,
15
- "eval_runtime": 66.4234,
16
- "eval_samples_per_second": 505.078,
17
- "eval_steps_per_second": 1.987,
18
- "step": 493
19
  },
20
  {
21
  "epoch": 2.0,
22
- "eval_loss": 3.4864065647125244,
23
- "eval_runtime": 66.5023,
24
- "eval_samples_per_second": 504.478,
25
- "eval_steps_per_second": 1.985,
26
- "step": 986
27
  },
28
  {
29
- "epoch": 3.0,
30
- "eval_loss": 3.420259475708008,
31
- "eval_runtime": 66.4744,
32
- "eval_samples_per_second": 504.691,
33
- "eval_steps_per_second": 1.986,
34
- "step": 1479
35
  }
36
  ],
37
- "logging_steps": 4000,
38
- "max_steps": 2465,
39
  "num_input_tokens_seen": 0,
40
  "num_train_epochs": 5,
41
  "save_steps": 500,
@@ -51,7 +64,7 @@
51
  "attributes": {}
52
  }
53
  },
54
- "total_flos": 2.5081291505664e+16,
55
  "train_batch_size": 256,
56
  "trial_name": null,
57
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1006,
3
+ "best_metric": 3.3002562522888184,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.982107355864811,
6
  "eval_steps": 500,
7
  "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
+ {
13
+ "epoch": 0.0019880715705765406,
14
+ "grad_norm": 30.286874771118164,
15
+ "learning_rate": 0.0,
16
+ "loss": 10.4037,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "grad_norm": 4.5649309158325195,
22
+ "learning_rate": 9.990049751243782e-05,
23
+ "loss": 4.5769,
24
+ "step": 503
25
+ },
26
  {
27
  "epoch": 1.0,
28
+ "eval_loss": 3.519712209701538,
29
+ "eval_runtime": 20.0659,
30
+ "eval_samples_per_second": 1582.434,
31
+ "eval_steps_per_second": 6.229,
32
+ "step": 503
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "grad_norm": 4.33253812789917,
37
+ "learning_rate": 7.487562189054727e-05,
38
+ "loss": 3.322,
39
+ "step": 1006
 
40
  },
41
  {
42
+ "epoch": 2.0,
43
+ "eval_loss": 3.3002562522888184,
44
+ "eval_runtime": 19.9319,
45
+ "eval_samples_per_second": 1593.072,
46
+ "eval_steps_per_second": 6.271,
47
+ "step": 1006
48
  }
49
  ],
50
+ "logging_steps": 500,
51
+ "max_steps": 2515,
52
  "num_input_tokens_seen": 0,
53
  "num_train_epochs": 5,
54
  "save_steps": 500,
 
64
  "attributes": {}
65
  }
66
  },
67
+ "total_flos": 2.5051112275968e+16,
68
  "train_batch_size": 256,
69
  "trial_name": null,
70
  "trial_params": null
checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1285c1203d7b7d0bc2c5cf4438755682a229df180e1a3302dd60ed4cd53422e8
3
  size 5969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbadbc9569720773994790c3cab01a01ad3d053aeceb5489a3f461ffef8d7cba
3
  size 5969