Yuvrajg2107 commited on
Commit
0e82021
·
verified ·
1 Parent(s): 051cec6

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b171e11cf1f06b390942cd5db703ed9b47273c7612b41e4993a30dbad949cdd
3
  size 1340623936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:136fa05d4822b585e93ffebb7f6e290435b41a950ca4ec0870515b9da5c83243
3
  size 1340623936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13798f6e5591b7f07db647f3489a166d6156b5472962733618f77a107016972a
3
  size 2681480830
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91a6833a7fe2adbcb1959181cf2a161bed8da2ab70461adce0419862c0a89a53
3
  size 2681480830
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f8256de738109b958aaf2e3557bae2f85d4052ebbc24cf2866e264ba390a20e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a99c6b534c3c4da04d2699dfbe806811bb55edcbda8669fe77a5c48de7405792
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14ae2a2128444abab378aa06c09a61a84665f758fcc19fc46f5789b0bc1b5665
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4aa03f6e0cd07cf67ce1fbe3101d545f5771ef9148b9debf02b11cf6948da5c
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2ce5645288f483e7a9daa2c7a16180a6531e36418232835ffee5a4fecb21062
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38a85adeb1e28a9f8d2b9e5eda88bb40a25fff60709bb465ee86ac35b1f82adc
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 1000,
3
- "best_metric": 0.16814623773097992,
4
- "best_model_checkpoint": "./training_output/checkpoint-1000",
5
- "epoch": 0.125,
6
  "eval_steps": 1000,
7
- "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -31,6 +31,29 @@
31
  "eval_samples_per_second": 15.374,
32
  "eval_steps_per_second": 0.961,
33
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
  ],
36
  "logging_steps": 500,
@@ -50,7 +73,7 @@
50
  "attributes": {}
51
  }
52
  },
53
- "total_flos": 3727725453312000.0,
54
  "train_batch_size": 2,
55
  "trial_name": null,
56
  "trial_params": null
 
1
  {
2
+ "best_global_step": 2000,
3
+ "best_metric": 0.07912886142730713,
4
+ "best_model_checkpoint": "./training_output/checkpoint-2000",
5
+ "epoch": 0.25,
6
  "eval_steps": 1000,
7
+ "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
31
  "eval_samples_per_second": 15.374,
32
  "eval_steps_per_second": 0.961,
33
  "step": 1000
34
+ },
35
+ {
36
+ "epoch": 0.1875,
37
+ "grad_norm": 0.07351929694414139,
38
+ "learning_rate": 1.62525e-05,
39
+ "loss": 0.0782,
40
+ "step": 1500
41
+ },
42
+ {
43
+ "epoch": 0.25,
44
+ "grad_norm": 0.13799075782299042,
45
+ "learning_rate": 1.5002500000000002e-05,
46
+ "loss": 0.0618,
47
+ "step": 2000
48
+ },
49
+ {
50
+ "epoch": 0.25,
51
+ "eval_accuracy": 0.923125,
52
+ "eval_loss": 0.07912886142730713,
53
+ "eval_runtime": 529.7101,
54
+ "eval_samples_per_second": 15.103,
55
+ "eval_steps_per_second": 0.944,
56
+ "step": 2000
57
  }
58
  ],
59
  "logging_steps": 500,
 
73
  "attributes": {}
74
  }
75
  },
76
+ "total_flos": 7455450906624000.0,
77
  "train_batch_size": 2,
78
  "trial_name": null,
79
  "trial_params": null