irishprancer commited on
Commit
1d19525
·
verified ·
1 Parent(s): 0e4d5b0

Training in progress, step 2250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:023bdf4fb37105c005af155a89b3617ec253742740b1c443640c3cfd919b153c
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95dc31204144af8854a2bd166b700ffe00c60e93c0807719622f465a8d64b4a6
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee34169c145fc9db54236abebff2e7bbd35b77780ea424fc21c3c1a2442d1d2d
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a5eb56f2c77d18a4cabe29b95f8b8d20c91a4f69adfa24c425612b6f1851b59
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0245389a7bc82d29e971a63bfd36a481643b7f85d29704591ececfc67d58955b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d81f454917d58ee0468d722bb9dc06ecc2918f8b10aaeacf5f5f0694536198a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1bce2a8653bdd97c984ac0cb8f8fc01c58c1e2c98ab50a771eb0dc001a601b0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71e101b8bc114bf6ab419ec6879047895e309c841d9a023f2c8c16366e9111d1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 91.30434782608695,
5
  "eval_steps": 150,
6
- "global_step": 2100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2037,6 +2037,151 @@
2037
  "EMA_steps_per_second": 25.058,
2038
  "epoch": 91.30434782608695,
2039
  "step": 2100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2040
  }
2041
  ],
2042
  "logging_steps": 10,
@@ -2056,7 +2201,7 @@
2056
  "attributes": {}
2057
  }
2058
  },
2059
- "total_flos": 5.411670961736909e+16,
2060
  "train_batch_size": 4,
2061
  "trial_name": null,
2062
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 97.82608695652173,
5
  "eval_steps": 150,
6
+ "global_step": 2250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2037
  "EMA_steps_per_second": 25.058,
2038
  "epoch": 91.30434782608695,
2039
  "step": 2100
2040
+ },
2041
+ {
2042
+ "epoch": 91.73913043478261,
2043
+ "grad_norm": 2.307114601135254,
2044
+ "learning_rate": 7.487848504295937e-06,
2045
+ "loss": 0.2596,
2046
+ "step": 2110
2047
+ },
2048
+ {
2049
+ "epoch": 92.17391304347827,
2050
+ "grad_norm": 2.0132083892822266,
2051
+ "learning_rate": 7.4878388584705885e-06,
2052
+ "loss": 0.2902,
2053
+ "step": 2120
2054
+ },
2055
+ {
2056
+ "epoch": 92.6086956521739,
2057
+ "grad_norm": 1.9218742847442627,
2058
+ "learning_rate": 7.487827728682402e-06,
2059
+ "loss": 0.2791,
2060
+ "step": 2130
2061
+ },
2062
+ {
2063
+ "epoch": 93.04347826086956,
2064
+ "grad_norm": 1.6972328424453735,
2065
+ "learning_rate": 7.487815114935791e-06,
2066
+ "loss": 0.2376,
2067
+ "step": 2140
2068
+ },
2069
+ {
2070
+ "epoch": 93.47826086956522,
2071
+ "grad_norm": 1.8078455924987793,
2072
+ "learning_rate": 7.487801017235753e-06,
2073
+ "loss": 0.289,
2074
+ "step": 2150
2075
+ },
2076
+ {
2077
+ "epoch": 93.91304347826087,
2078
+ "grad_norm": 2.128847599029541,
2079
+ "learning_rate": 7.4877854355878785e-06,
2080
+ "loss": 0.27,
2081
+ "step": 2160
2082
+ },
2083
+ {
2084
+ "epoch": 94.34782608695652,
2085
+ "grad_norm": 1.9462212324142456,
2086
+ "learning_rate": 7.487768369998342e-06,
2087
+ "loss": 0.2166,
2088
+ "step": 2170
2089
+ },
2090
+ {
2091
+ "epoch": 94.78260869565217,
2092
+ "grad_norm": 2.225867986679077,
2093
+ "learning_rate": 7.4877498204739075e-06,
2094
+ "loss": 0.2959,
2095
+ "step": 2180
2096
+ },
2097
+ {
2098
+ "epoch": 95.21739130434783,
2099
+ "grad_norm": 2.1711599826812744,
2100
+ "learning_rate": 7.487729787021927e-06,
2101
+ "loss": 0.2598,
2102
+ "step": 2190
2103
+ },
2104
+ {
2105
+ "epoch": 95.65217391304348,
2106
+ "grad_norm": 2.3892881870269775,
2107
+ "learning_rate": 7.487708269650342e-06,
2108
+ "loss": 0.2585,
2109
+ "step": 2200
2110
+ },
2111
+ {
2112
+ "epoch": 96.08695652173913,
2113
+ "grad_norm": 2.3716413974761963,
2114
+ "learning_rate": 7.487685268367682e-06,
2115
+ "loss": 0.2593,
2116
+ "step": 2210
2117
+ },
2118
+ {
2119
+ "epoch": 96.52173913043478,
2120
+ "grad_norm": 1.8392366170883179,
2121
+ "learning_rate": 7.487660783183063e-06,
2122
+ "loss": 0.2681,
2123
+ "step": 2220
2124
+ },
2125
+ {
2126
+ "epoch": 96.95652173913044,
2127
+ "grad_norm": 2.1921820640563965,
2128
+ "learning_rate": 7.48763481410619e-06,
2129
+ "loss": 0.2609,
2130
+ "step": 2230
2131
+ },
2132
+ {
2133
+ "epoch": 97.3913043478261,
2134
+ "grad_norm": 1.5945699214935303,
2135
+ "learning_rate": 7.487607361147356e-06,
2136
+ "loss": 0.2883,
2137
+ "step": 2240
2138
+ },
2139
+ {
2140
+ "epoch": 97.82608695652173,
2141
+ "grad_norm": 1.3208949565887451,
2142
+ "learning_rate": 7.487578424317443e-06,
2143
+ "loss": 0.2525,
2144
+ "step": 2250
2145
+ },
2146
+ {
2147
+ "epoch": 97.82608695652173,
2148
+ "eval_loss": 0.9061517715454102,
2149
+ "eval_runtime": 0.4781,
2150
+ "eval_samples_per_second": 20.916,
2151
+ "eval_steps_per_second": 20.916,
2152
+ "step": 2250
2153
+ },
2154
+ {
2155
+ "Start_State_loss": 0.861186683177948,
2156
+ "Start_State_runtime": 0.4046,
2157
+ "Start_State_samples_per_second": 24.715,
2158
+ "Start_State_steps_per_second": 24.715,
2159
+ "epoch": 97.82608695652173,
2160
+ "step": 2250
2161
+ },
2162
+ {
2163
+ "Raw_Model_loss": 0.9061517715454102,
2164
+ "Raw_Model_runtime": 0.4112,
2165
+ "Raw_Model_samples_per_second": 24.319,
2166
+ "Raw_Model_steps_per_second": 24.319,
2167
+ "epoch": 97.82608695652173,
2168
+ "step": 2250
2169
+ },
2170
+ {
2171
+ "SWA_loss": 0.74998939037323,
2172
+ "SWA_runtime": 0.4166,
2173
+ "SWA_samples_per_second": 24.004,
2174
+ "SWA_steps_per_second": 24.004,
2175
+ "epoch": 97.82608695652173,
2176
+ "step": 2250
2177
+ },
2178
+ {
2179
+ "EMA_loss": 0.8602108955383301,
2180
+ "EMA_runtime": 0.398,
2181
+ "EMA_samples_per_second": 25.124,
2182
+ "EMA_steps_per_second": 25.124,
2183
+ "epoch": 97.82608695652173,
2184
+ "step": 2250
2185
  }
2186
  ],
2187
  "logging_steps": 10,
 
2201
  "attributes": {}
2202
  }
2203
  },
2204
+ "total_flos": 5.800933883385446e+16,
2205
  "train_batch_size": 4,
2206
  "trial_name": null,
2207
  "trial_params": null