irishprancer commited on
Commit
e09dfe0
·
verified ·
1 Parent(s): 6a87c5e

Training in progress, step 3300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6140a82871947a5053ea753f4b4e7478652f03aaa4f2bffc3efce40f5b8a2acf
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32f20e5a113f0ce12c0b08ff61de538680ca692bc68665ee0332272cc0d8f53c
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e34c33547a03894cfd3b0e074d18c7955c94b3793b595b0fb33caf001f9e934
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:080d08b596e97f811e550148cdd9224bf440ad30a23acba52af3d08c0021f9aa
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1dc11cf7bbf295ee9c52e4bc96c7945f90dee5f465d4b3d8a5908a292cedccce
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c26e3773f4be8664a2594f025c73a5f9434f857a45f46fc072657f1fdefb7000
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddcc6f5bcca0b4700eaf14cba204c25dc453658df367f27efd599da5c59cda0d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:070547fc77391e346b90917e21c08178811df2dccd6cf65dcc04961ee24e1903
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 136.95652173913044,
5
  "eval_steps": 150,
6
- "global_step": 3150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3052,6 +3052,151 @@
3052
  "EMA_steps_per_second": 25.899,
3053
  "epoch": 136.95652173913044,
3054
  "step": 3150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3055
  }
3056
  ],
3057
  "logging_steps": 10,
@@ -3071,7 +3216,7 @@
3071
  "attributes": {}
3072
  }
3073
  },
3074
- "total_flos": 8.121206262826598e+16,
3075
  "train_batch_size": 4,
3076
  "trial_name": null,
3077
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 143.47826086956522,
5
  "eval_steps": 150,
6
+ "global_step": 3300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3052
  "EMA_steps_per_second": 25.899,
3053
  "epoch": 136.95652173913044,
3054
  "step": 3150
3055
+ },
3056
+ {
3057
+ "epoch": 137.3913043478261,
3058
+ "grad_norm": 1.7837154865264893,
3059
+ "learning_rate": 3.938808916216111e-06,
3060
+ "loss": 0.2769,
3061
+ "step": 3160
3062
+ },
3063
+ {
3064
+ "epoch": 137.82608695652175,
3065
+ "grad_norm": 1.8921836614608765,
3066
+ "learning_rate": 3.938780422835473e-06,
3067
+ "loss": 0.2349,
3068
+ "step": 3170
3069
+ },
3070
+ {
3071
+ "epoch": 138.2608695652174,
3072
+ "grad_norm": 1.6770527362823486,
3073
+ "learning_rate": 3.9387511489573816e-06,
3074
+ "loss": 0.2269,
3075
+ "step": 3180
3076
+ },
3077
+ {
3078
+ "epoch": 138.69565217391303,
3079
+ "grad_norm": 2.0695013999938965,
3080
+ "learning_rate": 3.9387210945934395e-06,
3081
+ "loss": 0.2747,
3082
+ "step": 3190
3083
+ },
3084
+ {
3085
+ "epoch": 139.1304347826087,
3086
+ "grad_norm": 1.3567662239074707,
3087
+ "learning_rate": 3.938690259755561e-06,
3088
+ "loss": 0.2254,
3089
+ "step": 3200
3090
+ },
3091
+ {
3092
+ "epoch": 139.56521739130434,
3093
+ "grad_norm": 1.9828763008117676,
3094
+ "learning_rate": 3.938658644455966e-06,
3095
+ "loss": 0.2696,
3096
+ "step": 3210
3097
+ },
3098
+ {
3099
+ "epoch": 140.0,
3100
+ "grad_norm": 2.950700283050537,
3101
+ "learning_rate": 3.938626248707186e-06,
3102
+ "loss": 0.23,
3103
+ "step": 3220
3104
+ },
3105
+ {
3106
+ "epoch": 140.43478260869566,
3107
+ "grad_norm": 2.2718775272369385,
3108
+ "learning_rate": 3.938593072522064e-06,
3109
+ "loss": 0.2313,
3110
+ "step": 3230
3111
+ },
3112
+ {
3113
+ "epoch": 140.8695652173913,
3114
+ "grad_norm": 1.705852746963501,
3115
+ "learning_rate": 3.938559115913747e-06,
3116
+ "loss": 0.2377,
3117
+ "step": 3240
3118
+ },
3119
+ {
3120
+ "epoch": 141.30434782608697,
3121
+ "grad_norm": 1.9781115055084229,
3122
+ "learning_rate": 3.938524378895696e-06,
3123
+ "loss": 0.2365,
3124
+ "step": 3250
3125
+ },
3126
+ {
3127
+ "epoch": 141.7391304347826,
3128
+ "grad_norm": 1.9883968830108643,
3129
+ "learning_rate": 3.938488861481679e-06,
3130
+ "loss": 0.249,
3131
+ "step": 3260
3132
+ },
3133
+ {
3134
+ "epoch": 142.17391304347825,
3135
+ "grad_norm": 1.706252098083496,
3136
+ "learning_rate": 3.9384525636857744e-06,
3137
+ "loss": 0.2577,
3138
+ "step": 3270
3139
+ },
3140
+ {
3141
+ "epoch": 142.6086956521739,
3142
+ "grad_norm": 2.533005952835083,
3143
+ "learning_rate": 3.938415485522369e-06,
3144
+ "loss": 0.2606,
3145
+ "step": 3280
3146
+ },
3147
+ {
3148
+ "epoch": 143.04347826086956,
3149
+ "grad_norm": 1.2762919664382935,
3150
+ "learning_rate": 3.93837762700616e-06,
3151
+ "loss": 0.2149,
3152
+ "step": 3290
3153
+ },
3154
+ {
3155
+ "epoch": 143.47826086956522,
3156
+ "grad_norm": 1.8011665344238281,
3157
+ "learning_rate": 3.938338988152153e-06,
3158
+ "loss": 0.2475,
3159
+ "step": 3300
3160
+ },
3161
+ {
3162
+ "epoch": 143.47826086956522,
3163
+ "eval_loss": 0.9505108594894409,
3164
+ "eval_runtime": 0.4,
3165
+ "eval_samples_per_second": 25.0,
3166
+ "eval_steps_per_second": 25.0,
3167
+ "step": 3300
3168
+ },
3169
+ {
3170
+ "Start_State_loss": 0.8609819412231445,
3171
+ "Start_State_runtime": 0.4096,
3172
+ "Start_State_samples_per_second": 24.414,
3173
+ "Start_State_steps_per_second": 24.414,
3174
+ "epoch": 143.47826086956522,
3175
+ "step": 3300
3176
+ },
3177
+ {
3178
+ "Raw_Model_loss": 0.9505108594894409,
3179
+ "Raw_Model_runtime": 0.3921,
3180
+ "Raw_Model_samples_per_second": 25.502,
3181
+ "Raw_Model_steps_per_second": 25.502,
3182
+ "epoch": 143.47826086956522,
3183
+ "step": 3300
3184
+ },
3185
+ {
3186
+ "SWA_loss": 0.7819399833679199,
3187
+ "SWA_runtime": 0.3956,
3188
+ "SWA_samples_per_second": 25.275,
3189
+ "SWA_steps_per_second": 25.275,
3190
+ "epoch": 143.47826086956522,
3191
+ "step": 3300
3192
+ },
3193
+ {
3194
+ "EMA_loss": 0.859829306602478,
3195
+ "EMA_runtime": 0.3871,
3196
+ "EMA_samples_per_second": 25.834,
3197
+ "EMA_steps_per_second": 25.834,
3198
+ "epoch": 143.47826086956522,
3199
+ "step": 3300
3200
  }
3201
  ],
3202
  "logging_steps": 10,
 
3216
  "attributes": {}
3217
  }
3218
  },
3219
+ "total_flos": 8.51203574828974e+16,
3220
  "train_batch_size": 4,
3221
  "trial_name": null,
3222
  "trial_params": null