Darayut commited on
Commit
c6c61be
·
verified ·
1 Parent(s): d646082

Training in progress, step 40000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e646557d1714b03c01fb412f6a1095d6f0b27dd1db5951c862dfad4d6c29751
3
  size 109474784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bfe6baaacbaddd015a12bab49156e00dc4c1e160550b542d0e226325ddf5db2
3
  size 109474784
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4228557dbc6e8fb4064941b1ddaa475b330a42e7b6431885dc21d12506dd651a
3
  size 219167566
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6ac30952c6f539d3da6544d4a2a861e02a244be12cf1dcce8debe764ab885bf
3
  size 219167566
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81e14e48fa7fa467aea82a41053dff7dcc24f23aa7b927758568ed7e573f1d8f
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51cf59aec563c848c9c9be96d1ec5da07a8b242f22b4d8d9c6b8147d291c7066
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d59504241658e218f41099a9cf64bc628eab9f2f1191eb39337b946e3c461384
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0387c1f4f109c89a4181c525b44fe19c547102aff44e31602a2e690f07d152e2
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88e6c307192b2d8ef1913d03dcc8581441beba1d5797a4977890061747277b7d
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a45dc5aad56aa99b2c784b45cd965c9e0655df0c73e21270b60c989eff8ce39c
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 4.107725,
6
  "eval_steps": 1000,
7
- "global_step": 39000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3089,6 +3089,85 @@
3089
  "eval_samples_per_second": 6.558,
3090
  "eval_steps_per_second": 0.82,
3091
  "step": 39000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3092
  }
3093
  ],
3094
  "logging_steps": 100,
@@ -3103,12 +3182,12 @@
3103
  "should_evaluate": false,
3104
  "should_log": false,
3105
  "should_save": true,
3106
- "should_training_stop": false
3107
  },
3108
  "attributes": {}
3109
  }
3110
  },
3111
- "total_flos": 4.02696903019966e+19,
3112
  "train_batch_size": 8,
3113
  "trial_name": null,
3114
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 4.132725,
6
  "eval_steps": 1000,
7
+ "global_step": 40000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3089
  "eval_samples_per_second": 6.558,
3090
  "eval_steps_per_second": 0.82,
3091
  "step": 39000
3092
+ },
3093
+ {
3094
+ "epoch": 4.110225,
3095
+ "grad_norm": 0.23436564207077026,
3096
+ "learning_rate": 6.581721081324321e-08,
3097
+ "loss": 0.1377,
3098
+ "step": 39100
3099
+ },
3100
+ {
3101
+ "epoch": 4.112725,
3102
+ "grad_norm": 0.46249279379844666,
3103
+ "learning_rate": 5.20229406119066e-08,
3104
+ "loss": 0.1134,
3105
+ "step": 39200
3106
+ },
3107
+ {
3108
+ "epoch": 4.115225,
3109
+ "grad_norm": 0.185123011469841,
3110
+ "learning_rate": 3.9847508890114574e-08,
3111
+ "loss": 0.0978,
3112
+ "step": 39300
3113
+ },
3114
+ {
3115
+ "epoch": 4.117725,
3116
+ "grad_norm": 0.8050533533096313,
3117
+ "learning_rate": 2.9291705694176052e-08,
3118
+ "loss": 0.1052,
3119
+ "step": 39400
3120
+ },
3121
+ {
3122
+ "epoch": 4.120225,
3123
+ "grad_norm": 0.2137298583984375,
3124
+ "learning_rate": 2.0356215975037006e-08,
3125
+ "loss": 0.108,
3126
+ "step": 39500
3127
+ },
3128
+ {
3129
+ "epoch": 4.122725,
3130
+ "grad_norm": 0.22636649012565613,
3131
+ "learning_rate": 1.3041619543818773e-08,
3132
+ "loss": 0.1081,
3133
+ "step": 39600
3134
+ },
3135
+ {
3136
+ "epoch": 4.125225,
3137
+ "grad_norm": 0.8003067374229431,
3138
+ "learning_rate": 7.34839103420093e-09,
3139
+ "loss": 0.1003,
3140
+ "step": 39700
3141
+ },
3142
+ {
3143
+ "epoch": 4.127725,
3144
+ "grad_norm": 0.31316348910331726,
3145
+ "learning_rate": 3.2768998716237177e-09,
3146
+ "loss": 0.0946,
3147
+ "step": 39800
3148
+ },
3149
+ {
3150
+ "epoch": 4.130225,
3151
+ "grad_norm": 0.1979643851518631,
3152
+ "learning_rate": 8.274102493155411e-10,
3153
+ "loss": 0.1167,
3154
+ "step": 39900
3155
+ },
3156
+ {
3157
+ "epoch": 4.132725,
3158
+ "grad_norm": 0.2671285569667816,
3159
+ "learning_rate": 8.111114835340772e-14,
3160
+ "loss": 0.1142,
3161
+ "step": 40000
3162
+ },
3163
+ {
3164
+ "epoch": 4.132725,
3165
+ "eval_loss": 0.2432384490966797,
3166
+ "eval_mean_iou": 0.38175953658272593,
3167
+ "eval_runtime": 22.4802,
3168
+ "eval_samples_per_second": 8.897,
3169
+ "eval_steps_per_second": 1.112,
3170
+ "step": 40000
3171
  }
3172
  ],
3173
  "logging_steps": 100,
 
3182
  "should_evaluate": false,
3183
  "should_log": false,
3184
  "should_save": true,
3185
+ "should_training_stop": true
3186
  },
3187
  "attributes": {}
3188
  }
3189
  },
3190
+ "total_flos": 4.130233913719967e+19,
3191
  "train_batch_size": 8,
3192
  "trial_name": null,
3193
  "trial_params": null