FredericFan commited on
Commit
3c329de
·
verified ·
1 Parent(s): 1729b59

Training in progress, step 13500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89d970941693f2a588e9760f579c7f92a3993862857386f4ce7a42732003bed9
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38f7ad03559d709a4e2e50b069ded790243b605bd7f371ff573649b04f3b9ec1
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa728c323613f14720350a34b8de9e9bb0f00feb6895e9763f98b41fc90ba66a
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92b31881fd08d1c5e92f4d85e7ad4bea7caf87612bcfd05226d0426ef46f64b4
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f6fa31d853fe83023de7f7f07d4ad55cd60c82617211a8926ae6bb50464d9fc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa8e3657fd3e577ff4e755452808dc3c4520d43cd58c493adfa9663f21feb734
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a8f1b9bc1d96d7439df35e8166ab30771f48f3a8a26970884d1d49063118f39
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc96f5c8ec054c4dc9f1608ed0c88e89518d2f17c416476f4efe7bf3b829bb03
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.0824647843837738,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-12500",
4
- "epoch": 1.04,
5
  "eval_steps": 500,
6
- "global_step": 13000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2035,6 +2035,84 @@
2035
  "eval_samples_per_second": 22.724,
2036
  "eval_steps_per_second": 5.681,
2037
  "step": 13000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2038
  }
2039
  ],
2040
  "logging_steps": 50,
@@ -2054,7 +2132,7 @@
2054
  "attributes": {}
2055
  }
2056
  },
2057
- "total_flos": 3.166581030912e+16,
2058
  "train_batch_size": 4,
2059
  "trial_name": null,
2060
  "trial_params": null
 
1
  {
2
  "best_metric": 0.0824647843837738,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-12500",
4
+ "epoch": 1.08,
5
  "eval_steps": 500,
6
+ "global_step": 13500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2035
  "eval_samples_per_second": 22.724,
2036
  "eval_steps_per_second": 5.681,
2037
  "step": 13000
2038
+ },
2039
+ {
2040
+ "epoch": 1.044,
2041
+ "grad_norm": 0.09841930866241455,
2042
+ "learning_rate": 1.43424e-05,
2043
+ "loss": 0.0534,
2044
+ "step": 13050
2045
+ },
2046
+ {
2047
+ "epoch": 1.048,
2048
+ "grad_norm": 0.1224198266863823,
2049
+ "learning_rate": 1.42824e-05,
2050
+ "loss": 0.0564,
2051
+ "step": 13100
2052
+ },
2053
+ {
2054
+ "epoch": 1.052,
2055
+ "grad_norm": 0.17220191657543182,
2056
+ "learning_rate": 1.42224e-05,
2057
+ "loss": 0.0567,
2058
+ "step": 13150
2059
+ },
2060
+ {
2061
+ "epoch": 1.056,
2062
+ "grad_norm": 0.08674409985542297,
2063
+ "learning_rate": 1.41636e-05,
2064
+ "loss": 0.0508,
2065
+ "step": 13200
2066
+ },
2067
+ {
2068
+ "epoch": 1.06,
2069
+ "grad_norm": 0.12293367087841034,
2070
+ "learning_rate": 1.41036e-05,
2071
+ "loss": 0.0541,
2072
+ "step": 13250
2073
+ },
2074
+ {
2075
+ "epoch": 1.064,
2076
+ "grad_norm": 0.12341846525669098,
2077
+ "learning_rate": 1.40436e-05,
2078
+ "loss": 0.055,
2079
+ "step": 13300
2080
+ },
2081
+ {
2082
+ "epoch": 1.068,
2083
+ "grad_norm": 0.10165009647607803,
2084
+ "learning_rate": 1.39836e-05,
2085
+ "loss": 0.0589,
2086
+ "step": 13350
2087
+ },
2088
+ {
2089
+ "epoch": 1.072,
2090
+ "grad_norm": 0.18138067424297333,
2091
+ "learning_rate": 1.39236e-05,
2092
+ "loss": 0.053,
2093
+ "step": 13400
2094
+ },
2095
+ {
2096
+ "epoch": 1.076,
2097
+ "grad_norm": 0.18615098297595978,
2098
+ "learning_rate": 1.3863599999999999e-05,
2099
+ "loss": 0.0558,
2100
+ "step": 13450
2101
+ },
2102
+ {
2103
+ "epoch": 1.08,
2104
+ "grad_norm": 0.06511889398097992,
2105
+ "learning_rate": 1.38036e-05,
2106
+ "loss": 0.062,
2107
+ "step": 13500
2108
+ },
2109
+ {
2110
+ "epoch": 1.08,
2111
+ "eval_loss": 0.08285626024007797,
2112
+ "eval_runtime": 88.0463,
2113
+ "eval_samples_per_second": 22.715,
2114
+ "eval_steps_per_second": 5.679,
2115
+ "step": 13500
2116
  }
2117
  ],
2118
  "logging_steps": 50,
 
2132
  "attributes": {}
2133
  }
2134
  },
2135
+ "total_flos": 3.288372609024e+16,
2136
  "train_batch_size": 4,
2137
  "trial_name": null,
2138
  "trial_params": null