irishprancer commited on
Commit
c67bafd
·
verified ·
1 Parent(s): 099dd15

Training in progress, step 3300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d63441ec3a969a74407420396fbf80d70a54603fc26523cf80a059be318bdc6
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:561dd93e7200e620b3a628a98f78b77e49214dc77820bd53e01765ece4e79aaf
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8341a25de57f92cfd4f595bd362441a1aa28e0e40a28c5233a174e404e93cb3
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2fbfde6e57429b12b8a8888f9861f253842d30f7969950972c18e96feefbcc7
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1dc11cf7bbf295ee9c52e4bc96c7945f90dee5f465d4b3d8a5908a292cedccce
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c26e3773f4be8664a2594f025c73a5f9434f857a45f46fc072657f1fdefb7000
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3561b109706461e913d6181bf1abc2a9b68bea4d15e3fa953484e4d068be280b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a0ab9d5b0198a16acdaf1c9f1e4c57811cdaac3c11a1070ce9660ad9c246b9d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 136.95652173913044,
5
  "eval_steps": 150,
6
- "global_step": 3150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3052,6 +3052,151 @@
3052
  "EMA_steps_per_second": 25.577,
3053
  "epoch": 136.95652173913044,
3054
  "step": 3150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3055
  }
3056
  ],
3057
  "logging_steps": 10,
@@ -3071,7 +3216,7 @@
3071
  "attributes": {}
3072
  }
3073
  },
3074
- "total_flos": 8.121206262826598e+16,
3075
  "train_batch_size": 4,
3076
  "trial_name": null,
3077
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 143.47826086956522,
5
  "eval_steps": 150,
6
+ "global_step": 3300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3052
  "EMA_steps_per_second": 25.577,
3053
  "epoch": 136.95652173913044,
3054
  "step": 3150
3055
+ },
3056
+ {
3057
+ "epoch": 137.3913043478261,
3058
+ "grad_norm": 1.8168175220489502,
3059
+ "learning_rate": 3.90958477702786e-06,
3060
+ "loss": 0.2772,
3061
+ "step": 3160
3062
+ },
3063
+ {
3064
+ "epoch": 137.82608695652175,
3065
+ "grad_norm": 1.9455727338790894,
3066
+ "learning_rate": 3.909556495054918e-06,
3067
+ "loss": 0.235,
3068
+ "step": 3170
3069
+ },
3070
+ {
3071
+ "epoch": 138.2608695652174,
3072
+ "grad_norm": 1.652616024017334,
3073
+ "learning_rate": 3.9095274383754535e-06,
3074
+ "loss": 0.2271,
3075
+ "step": 3180
3076
+ },
3077
+ {
3078
+ "epoch": 138.69565217391303,
3079
+ "grad_norm": 2.0651702880859375,
3080
+ "learning_rate": 3.9094976070009825e-06,
3081
+ "loss": 0.275,
3082
+ "step": 3190
3083
+ },
3084
+ {
3085
+ "epoch": 139.1304347826087,
3086
+ "grad_norm": 1.3532943725585938,
3087
+ "learning_rate": 3.90946700094333e-06,
3088
+ "loss": 0.2252,
3089
+ "step": 3200
3090
+ },
3091
+ {
3092
+ "epoch": 139.56521739130434,
3093
+ "grad_norm": 1.9652019739151,
3094
+ "learning_rate": 3.909435620214626e-06,
3095
+ "loss": 0.2701,
3096
+ "step": 3210
3097
+ },
3098
+ {
3099
+ "epoch": 140.0,
3100
+ "grad_norm": 2.921708106994629,
3101
+ "learning_rate": 3.909403464827308e-06,
3102
+ "loss": 0.2301,
3103
+ "step": 3220
3104
+ },
3105
+ {
3106
+ "epoch": 140.43478260869566,
3107
+ "grad_norm": 2.249617576599121,
3108
+ "learning_rate": 3.909370534794125e-06,
3109
+ "loss": 0.2311,
3110
+ "step": 3230
3111
+ },
3112
+ {
3113
+ "epoch": 140.8695652173913,
3114
+ "grad_norm": 1.7195583581924438,
3115
+ "learning_rate": 3.9093368301281256e-06,
3116
+ "loss": 0.238,
3117
+ "step": 3240
3118
+ },
3119
+ {
3120
+ "epoch": 141.30434782608697,
3121
+ "grad_norm": 1.9796561002731323,
3122
+ "learning_rate": 3.909302350842671e-06,
3123
+ "loss": 0.2368,
3124
+ "step": 3250
3125
+ },
3126
+ {
3127
+ "epoch": 141.7391304347826,
3128
+ "grad_norm": 2.028313636779785,
3129
+ "learning_rate": 3.909267096951428e-06,
3130
+ "loss": 0.2492,
3131
+ "step": 3260
3132
+ },
3133
+ {
3134
+ "epoch": 142.17391304347825,
3135
+ "grad_norm": 1.7353265285491943,
3136
+ "learning_rate": 3.90923106846837e-06,
3137
+ "loss": 0.2578,
3138
+ "step": 3270
3139
+ },
3140
+ {
3141
+ "epoch": 142.6086956521739,
3142
+ "grad_norm": 2.5716023445129395,
3143
+ "learning_rate": 3.9091942654077765e-06,
3144
+ "loss": 0.2606,
3145
+ "step": 3280
3146
+ },
3147
+ {
3148
+ "epoch": 143.04347826086956,
3149
+ "grad_norm": 1.2427494525909424,
3150
+ "learning_rate": 3.9091566877842355e-06,
3151
+ "loss": 0.2151,
3152
+ "step": 3290
3153
+ },
3154
+ {
3155
+ "epoch": 143.47826086956522,
3156
+ "grad_norm": 1.797579288482666,
3157
+ "learning_rate": 3.9091183356126425e-06,
3158
+ "loss": 0.2476,
3159
+ "step": 3300
3160
+ },
3161
+ {
3162
+ "epoch": 143.47826086956522,
3163
+ "eval_loss": 0.9522649049758911,
3164
+ "eval_runtime": 0.4144,
3165
+ "eval_samples_per_second": 24.129,
3166
+ "eval_steps_per_second": 24.129,
3167
+ "step": 3300
3168
+ },
3169
+ {
3170
+ "Start_State_loss": 0.861186683177948,
3171
+ "Start_State_runtime": 0.4101,
3172
+ "Start_State_samples_per_second": 24.382,
3173
+ "Start_State_steps_per_second": 24.382,
3174
+ "epoch": 143.47826086956522,
3175
+ "step": 3300
3176
+ },
3177
+ {
3178
+ "Raw_Model_loss": 0.9522649049758911,
3179
+ "Raw_Model_runtime": 0.4058,
3180
+ "Raw_Model_samples_per_second": 24.644,
3181
+ "Raw_Model_steps_per_second": 24.644,
3182
+ "epoch": 143.47826086956522,
3183
+ "step": 3300
3184
+ },
3185
+ {
3186
+ "SWA_loss": 0.7822953462600708,
3187
+ "SWA_runtime": 0.4238,
3188
+ "SWA_samples_per_second": 23.595,
3189
+ "SWA_steps_per_second": 23.595,
3190
+ "epoch": 143.47826086956522,
3191
+ "step": 3300
3192
+ },
3193
+ {
3194
+ "EMA_loss": 0.8601328134536743,
3195
+ "EMA_runtime": 0.4018,
3196
+ "EMA_samples_per_second": 24.887,
3197
+ "EMA_steps_per_second": 24.887,
3198
+ "epoch": 143.47826086956522,
3199
+ "step": 3300
3200
  }
3201
  ],
3202
  "logging_steps": 10,
 
3216
  "attributes": {}
3217
  }
3218
  },
3219
+ "total_flos": 8.51203574828974e+16,
3220
  "train_batch_size": 4,
3221
  "trial_name": null,
3222
  "trial_params": null