SystemAdmin123 commited on
Commit
6a52f1b
·
verified ·
1 Parent(s): ca1fde5

Training in progress, step 2300, checkpoint

Browse files
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c8c6ff74448961f63c682755ee44b066c13961022d73c35618f424b0b3b04f2
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4480cf94e90fbcff6ded4ea0e033092e231273c07472d355d57709c17971b8c0
3
  size 4976698672
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19ad29e2afc3f664caef80f5f659fd2e3564bb5cc89ac52c1e921a1fa793063f
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cab56d644789b3d0d8a275c1d8bb39ee8a44ddb72a82700ae804ea2df595a0c
3
  size 4999802720
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de778ad033fb57d0c77011ce2efc5adbb4d2d05637cac1cc29a51ee29f3a143a
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2576b8c41b55fcd088c51000da64fb5acf75a9919b8c7663a460d9d3353867f
3
  size 4915916176
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70e0ab1944abe87b05cd898915e49be1d42ad8b7bcf4d4fd71d39ce17c168f52
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9c70312c9c125fe0ca49b749fdf3d0f1672e3e046c39375a7f1e758e7ec2176
3
  size 1168138808
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14109e8a50e6b2db2412f32737fe9c023d674ef95a0a8795aecdbf2daa7b9b3f
3
  size 16311821444
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:524e6c358512b5845edfe3223d57c6d965a4abb689d2e5543ab49bf57b265729
3
  size 16311821444
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3134a0ee77592caf611ce2f68e680b25ae8deac50b353e39ae4a57c957fb3acf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79370aa8e64e10f5426d7aadbbf1bb1c37647728868a165ccce129881ecf31d5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.015873015873016,
5
  "eval_steps": 40,
6
- "global_step": 2280,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2067,6 +2067,20 @@
2067
  "eval_samples_per_second": 51.543,
2068
  "eval_steps_per_second": 6.459,
2069
  "step": 2280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2070
  }
2071
  ],
2072
  "logging_steps": 10,
@@ -2081,12 +2095,12 @@
2081
  "should_evaluate": false,
2082
  "should_log": false,
2083
  "should_save": true,
2084
- "should_training_stop": false
2085
  },
2086
  "attributes": {}
2087
  }
2088
  },
2089
- "total_flos": 1.6821022433036206e+18,
2090
  "train_batch_size": 2,
2091
  "trial_name": null,
2092
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.068783068783069,
5
  "eval_steps": 40,
6
+ "global_step": 2300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2067
  "eval_samples_per_second": 51.543,
2068
  "eval_steps_per_second": 6.459,
2069
  "step": 2280
2070
+ },
2071
+ {
2072
+ "epoch": 6.042328042328043,
2073
+ "grad_norm": 0.1318359375,
2074
+ "learning_rate": 1.0336163855129144e-08,
2075
+ "loss": 0.0056,
2076
+ "step": 2290
2077
+ },
2078
+ {
2079
+ "epoch": 6.068783068783069,
2080
+ "grad_norm": 0.10400390625,
2081
+ "learning_rate": 0.0,
2082
+ "loss": 0.0075,
2083
+ "step": 2300
2084
  }
2085
  ],
2086
  "logging_steps": 10,
 
2095
  "should_evaluate": false,
2096
  "should_log": false,
2097
  "should_save": true,
2098
+ "should_training_stop": true
2099
  },
2100
  "attributes": {}
2101
  }
2102
  },
2103
+ "total_flos": 1.6968575261396173e+18,
2104
  "train_batch_size": 2,
2105
  "trial_name": null,
2106
  "trial_params": null