fpadovani commited on
Commit
b2b82b2
·
verified ·
1 Parent(s): e437f99

Training in progress, step 2000, checkpoint

Browse files
checkpoint-2000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7ef501245cab36951ffbc368622440ccb8abff1a5d4cae896b51162b3d3474f
3
  size 435544704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92bf2ce48e8f6b47c2bdd084954a9a31723d1dd2eaffb94f92e2a19f6a6919c6
3
  size 435544704
checkpoint-2000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38e8c7d41ae296f1baf229587ce4accf7da5f3cf2437c9a2e7934230552369fe
3
  size 871183627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7809189c297b6fcd1364a9948878a7a909dbed481baaec7485bcec1cb29f1bf
3
  size 871183627
checkpoint-2000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84ed9aea9531213b3bd8e1b6c76a640fc631139a5416eed6660fc9cc59215653
3
  size 14709
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8c48da2a25aa191b4ec410b8050371253a3b1cc35199ae599d4c1c2c01dc3d2
3
  size 14709
checkpoint-2000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4aa03f6e0cd07cf67ce1fbe3101d545f5771ef9148b9debf02b11cf6948da5c
3
+ size 1383
checkpoint-2000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f70fe7298867fed663d1ff8e535a73bf05f135252ce7b79eaa00f66ec9b92df
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49b00ffe724ca6b73e99f53f62318aca0890f3ad003cefa141b877072b3ea38f
3
  size 1465
checkpoint-2000/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/trainer_state.json CHANGED
@@ -1,49 +1,69 @@
1
  {
2
- "best_global_step": 1964,
3
- "best_metric": 3.5689728260040283,
4
  "best_model_checkpoint": null,
5
- "epoch": 4.0733197556008145,
6
  "eval_steps": 500,
7
  "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  {
13
  "epoch": 1.0,
14
- "eval_loss": 4.097745895385742,
15
- "eval_runtime": 66.4624,
16
- "eval_samples_per_second": 504.782,
17
- "eval_steps_per_second": 1.986,
18
- "step": 491
19
  },
20
  {
21
  "epoch": 2.0,
22
- "eval_loss": 3.760134696960449,
23
- "eval_runtime": 66.4623,
24
- "eval_samples_per_second": 504.783,
25
- "eval_steps_per_second": 1.986,
26
- "step": 982
 
 
 
 
 
 
 
27
  },
28
  {
29
  "epoch": 3.0,
30
- "eval_loss": 3.632955312728882,
31
- "eval_runtime": 66.4406,
32
- "eval_samples_per_second": 504.947,
33
- "eval_steps_per_second": 1.987,
34
- "step": 1473
35
  },
36
  {
37
- "epoch": 4.0,
38
- "eval_loss": 3.5689728260040283,
39
- "eval_runtime": 66.4551,
40
- "eval_samples_per_second": 504.837,
41
- "eval_steps_per_second": 1.986,
42
- "step": 1964
43
  }
44
  ],
45
- "logging_steps": 4000,
46
- "max_steps": 2455,
47
  "num_input_tokens_seen": 0,
48
  "num_train_epochs": 5,
49
  "save_steps": 500,
@@ -59,7 +79,7 @@
59
  "attributes": {}
60
  }
61
  },
62
- "total_flos": 3.3434405830656e+16,
63
  "train_batch_size": 256,
64
  "trial_name": null,
65
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1503,
3
+ "best_metric": 3.3919992446899414,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.992015968063872,
6
  "eval_steps": 500,
7
  "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
+ {
13
+ "epoch": 0.001996007984031936,
14
+ "grad_norm": 30.08072853088379,
15
+ "learning_rate": 0.0,
16
+ "loss": 10.3965,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "grad_norm": 1.243941068649292,
22
+ "learning_rate": 0.0001,
23
+ "loss": 4.8834,
24
+ "step": 501
25
+ },
26
  {
27
  "epoch": 1.0,
28
+ "eval_loss": 3.908651113510132,
29
+ "eval_runtime": 19.9819,
30
+ "eval_samples_per_second": 1586.036,
31
+ "eval_steps_per_second": 6.206,
32
+ "step": 501
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "grad_norm": 0.9641751050949097,
37
+ "learning_rate": 7.495e-05,
38
+ "loss": 3.6523,
39
+ "step": 1002
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "eval_loss": 3.5351648330688477,
44
+ "eval_runtime": 19.9653,
45
+ "eval_samples_per_second": 1587.356,
46
+ "eval_steps_per_second": 6.211,
47
+ "step": 1002
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 0.8552572131156921,
52
+ "learning_rate": 4.99e-05,
53
+ "loss": 3.3936,
54
+ "step": 1503
 
55
  },
56
  {
57
+ "epoch": 3.0,
58
+ "eval_loss": 3.3919992446899414,
59
+ "eval_runtime": 32.8666,
60
+ "eval_samples_per_second": 964.261,
61
+ "eval_steps_per_second": 3.773,
62
+ "step": 1503
63
  }
64
  ],
65
+ "logging_steps": 500,
66
+ "max_steps": 2505,
67
  "num_input_tokens_seen": 0,
68
  "num_train_epochs": 5,
69
  "save_steps": 500,
 
79
  "attributes": {}
80
  }
81
  },
82
+ "total_flos": 3.3440284901376e+16,
83
  "train_batch_size": 256,
84
  "trial_name": null,
85
  "trial_params": null
checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a57dba9b9c8c3226400ebcad0a2060a021a094dd027a7ac1acf9378f5c6cc27
3
  size 5969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb7a8afa9c6745812d26be318b1c7d9348962a9c4c0d9cbb9442934f28d74eb6
3
  size 5969