fpadovani commited on
Commit
ed0bd03
·
verified ·
1 Parent(s): bfc808d

Training in progress, step 500, checkpoint

Browse files
checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a82666204de8b89bd0036f6260389ddc7c2fbf6bc8307febc536ce90920f9ad1
3
  size 435544704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0e79df87d9c759bd2761e451a5e321f108e1ad64bdcc914571daecc9f5d2dbe
3
  size 435544704
checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:110f3051b3640d1a1fc6615fd702246f8d8b3fdc45b595ac33e7a223d9bd664b
3
  size 871183627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:975de4f1994dc71281a49407729fdb3f499bcdd19ac0baf85690d59a68bebdea
3
  size 871183627
checkpoint-500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00b092abfcd4cf2a442863110ef72a1ea6d44da9e7f7b987c0619b0f03d4c428
3
  size 14709
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:420235bf06177d9f84f39f30fbfcee8722feaeba193f6efc76bcd8a2574da100
3
  size 14709
checkpoint-500/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f77569c2e850b04af982cc8c1389f1430851448915c593b69e5da36ce05b71d7
3
+ size 1383
checkpoint-500/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-500/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_global_step": 491,
3
- "best_metric": 4.097745895385742,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.0183299389002036,
6
  "eval_steps": 500,
7
  "global_step": 500,
8
  "is_hyper_param_search": false,
@@ -10,16 +10,15 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 1.0,
14
- "eval_loss": 4.097745895385742,
15
- "eval_runtime": 66.4624,
16
- "eval_samples_per_second": 504.782,
17
- "eval_steps_per_second": 1.986,
18
- "step": 491
19
  }
20
  ],
21
- "logging_steps": 4000,
22
- "max_steps": 2455,
23
  "num_input_tokens_seen": 0,
24
  "num_train_epochs": 5,
25
  "save_steps": 500,
@@ -35,7 +34,7 @@
35
  "attributes": {}
36
  }
37
  },
38
- "total_flos": 8358601457664000.0,
39
  "train_batch_size": 256,
40
  "trial_name": null,
41
  "trial_params": null
 
1
  {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.998003992015968,
6
  "eval_steps": 500,
7
  "global_step": 500,
8
  "is_hyper_param_search": false,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.001996007984031936,
14
+ "grad_norm": 30.08072853088379,
15
+ "learning_rate": 0.0,
16
+ "loss": 10.3965,
17
+ "step": 1
 
18
  }
19
  ],
20
+ "logging_steps": 500,
21
+ "max_steps": 2505,
22
  "num_input_tokens_seen": 0,
23
  "num_train_epochs": 5,
24
  "save_steps": 500,
 
34
  "attributes": {}
35
  }
36
  },
37
+ "total_flos": 8361345024000000.0,
38
  "train_batch_size": 256,
39
  "trial_name": null,
40
  "trial_params": null
checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a57dba9b9c8c3226400ebcad0a2060a021a094dd027a7ac1acf9378f5c6cc27
3
  size 5969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb7a8afa9c6745812d26be318b1c7d9348962a9c4c0d9cbb9442934f28d74eb6
3
  size 5969