Jasim9085 commited on
Commit
e3e3e10
·
verified ·
1 Parent(s): 4222a34

Training in progress, step 1528, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6fb4957fada6ecab50fa08c8a31d022817b69beb5fe802008a63553b4e3af87
3
  size 557912620
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6b9d2acbd0f168009f26b7ef13d571b89b41526bebf47dfd26aa64eccffacf7
3
  size 557912620
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01bf0a653f421c33e3ba0755c2101d46686890bbdb05dfa9238d9bd8d0bdd825
3
  size 1115579834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4b934f68933cf5a165e3dc8c0b8ed15087c86623719bd958ef589336c02b179
3
  size 1115579834
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e25a375aea0986e1ff49c229feb71c134d7d2c78e3f6bc24f04319b58e4105c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edf4cbadc64285734eaa93ad7c58796388c4125fa5554ad0cc36602591473bf9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f7c4028863b38382c81b1e563b248dc15055fba98c7622e800df339f6218bc1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f523988ba2de8054dc46a890358ab72870b8513bf3f43057aee303c531f01bf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.6544502617801047,
6
  "eval_steps": 500,
7
- "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -36,6 +36,20 @@
36
  "learning_rate": 1.731020942408377e-05,
37
  "loss": 0.0004,
38
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
  ],
41
  "logging_steps": 250,
@@ -50,12 +64,12 @@
50
  "should_evaluate": false,
51
  "should_log": false,
52
  "should_save": true,
53
- "should_training_stop": false
54
  },
55
  "attributes": {}
56
  }
57
  },
58
- "total_flos": 4877891665920000.0,
59
  "train_batch_size": 16,
60
  "trial_name": null,
61
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 1528,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
36
  "learning_rate": 1.731020942408377e-05,
37
  "loss": 0.0004,
38
  "step": 1000
39
+ },
40
+ {
41
+ "epoch": 0.8180628272251309,
42
+ "grad_norm": 0.03750693053007126,
43
+ "learning_rate": 9.129581151832461e-06,
44
+ "loss": 0.0003,
45
+ "step": 1250
46
+ },
47
+ {
48
+ "epoch": 0.981675392670157,
49
+ "grad_norm": 0.022292787209153175,
50
+ "learning_rate": 9.489528795811518e-07,
51
+ "loss": 0.0004,
52
+ "step": 1500
53
  }
54
  ],
55
  "logging_steps": 250,
 
64
  "should_evaluate": false,
65
  "should_log": false,
66
  "should_save": true,
67
+ "should_training_stop": true
68
  },
69
  "attributes": {}
70
  }
71
  },
72
+ "total_flos": 7452961163182080.0,
73
  "train_batch_size": 16,
74
  "trial_name": null,
75
  "trial_params": null