error577 commited on
Commit
f4d519b
·
verified ·
1 Parent(s): 2c63544

Training in progress, step 450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86b3b63be4448755c0009b69660d5b9414363b669c5d657fda85d4386ba0f2dc
3
  size 957942768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efc1163b23eb593a8578929e2d130f96856b97846e4d1b51a959c1dabb4bf922
3
  size 957942768
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:972ec02ed85fb0b1e514b3e282beecbfbea88f1cf7554bba444c742a4f7b220f
3
  size 487013236
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ca1a8806e2d359746c96a8505c0db43cba130ceb07ec58151102bc711638ccf
3
  size 487013236
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:460992d1fc328a7355a0075c0ee699b21b745584f7f8824117265d427680a5f8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4f2232a20060f4e34ec2de4e5c7dd055420041fd4b32536fbe812cf646a366e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0652429c23558e0e0a102ee7a03bf6b258b24fad4104041d1c89929993cab61a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ded3d032047d899801b61ad8c24ff5f0dcb962a1322be38b449c3ef1152b14be
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.050189256668091,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
- "epoch": 0.22475066722854334,
5
  "eval_steps": 50,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2879,6 +2879,364 @@
2879
  "eval_samples_per_second": 8.484,
2880
  "eval_steps_per_second": 8.484,
2881
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2882
  }
2883
  ],
2884
  "logging_steps": 1,
@@ -2893,7 +3251,7 @@
2893
  "early_stopping_threshold": 0.0
2894
  },
2895
  "attributes": {
2896
- "early_stopping_patience_counter": 2
2897
  }
2898
  },
2899
  "TrainerControl": {
@@ -2902,12 +3260,12 @@
2902
  "should_evaluate": false,
2903
  "should_log": false,
2904
  "should_save": true,
2905
- "should_training_stop": false
2906
  },
2907
  "attributes": {}
2908
  }
2909
  },
2910
- "total_flos": 5.92624150904832e+16,
2911
  "train_batch_size": 1,
2912
  "trial_name": null,
2913
  "trial_params": null
 
1
  {
2
  "best_metric": 2.050189256668091,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
+ "epoch": 0.2528445006321112,
5
  "eval_steps": 50,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2879
  "eval_samples_per_second": 8.484,
2880
  "eval_steps_per_second": 8.484,
2881
  "step": 400
2882
+ },
2883
+ {
2884
+ "epoch": 0.2253125438966147,
2885
+ "grad_norm": 1.295182704925537,
2886
+ "learning_rate": 0.0002960296947671209,
2887
+ "loss": 2.1389,
2888
+ "step": 401
2889
+ },
2890
+ {
2891
+ "epoch": 0.22587442056468604,
2892
+ "grad_norm": 1.8076077699661255,
2893
+ "learning_rate": 0.00029600945097446043,
2894
+ "loss": 2.3872,
2895
+ "step": 402
2896
+ },
2897
+ {
2898
+ "epoch": 0.2264362972327574,
2899
+ "grad_norm": 1.4435436725616455,
2900
+ "learning_rate": 0.0002959891563991561,
2901
+ "loss": 1.7958,
2902
+ "step": 403
2903
+ },
2904
+ {
2905
+ "epoch": 0.22699817390082877,
2906
+ "grad_norm": 1.2692290544509888,
2907
+ "learning_rate": 0.0002959688110482665,
2908
+ "loss": 2.1162,
2909
+ "step": 404
2910
+ },
2911
+ {
2912
+ "epoch": 0.22756005056890014,
2913
+ "grad_norm": 1.4647787809371948,
2914
+ "learning_rate": 0.0002959484149288678,
2915
+ "loss": 2.0282,
2916
+ "step": 405
2917
+ },
2918
+ {
2919
+ "epoch": 0.22812192723697147,
2920
+ "grad_norm": 2.5118775367736816,
2921
+ "learning_rate": 0.0002959279680480538,
2922
+ "loss": 1.9518,
2923
+ "step": 406
2924
+ },
2925
+ {
2926
+ "epoch": 0.22868380390504284,
2927
+ "grad_norm": 1.2008159160614014,
2928
+ "learning_rate": 0.0002959074704129361,
2929
+ "loss": 1.9314,
2930
+ "step": 407
2931
+ },
2932
+ {
2933
+ "epoch": 0.2292456805731142,
2934
+ "grad_norm": 1.215867519378662,
2935
+ "learning_rate": 0.00029588692203064376,
2936
+ "loss": 1.6709,
2937
+ "step": 408
2938
+ },
2939
+ {
2940
+ "epoch": 0.22980755724118557,
2941
+ "grad_norm": 1.3331108093261719,
2942
+ "learning_rate": 0.00029586632290832366,
2943
+ "loss": 1.9617,
2944
+ "step": 409
2945
+ },
2946
+ {
2947
+ "epoch": 0.2303694339092569,
2948
+ "grad_norm": 1.653433918952942,
2949
+ "learning_rate": 0.0002958456730531403,
2950
+ "loss": 1.6985,
2951
+ "step": 410
2952
+ },
2953
+ {
2954
+ "epoch": 0.23093131057732827,
2955
+ "grad_norm": 1.3687360286712646,
2956
+ "learning_rate": 0.0002958249724722756,
2957
+ "loss": 2.1661,
2958
+ "step": 411
2959
+ },
2960
+ {
2961
+ "epoch": 0.23149318724539963,
2962
+ "grad_norm": 4.9779205322265625,
2963
+ "learning_rate": 0.00029580422117292946,
2964
+ "loss": 1.8247,
2965
+ "step": 412
2966
+ },
2967
+ {
2968
+ "epoch": 0.232055063913471,
2969
+ "grad_norm": 1.7227343320846558,
2970
+ "learning_rate": 0.0002957834191623191,
2971
+ "loss": 2.2165,
2972
+ "step": 413
2973
+ },
2974
+ {
2975
+ "epoch": 0.23261694058154236,
2976
+ "grad_norm": 1.2426742315292358,
2977
+ "learning_rate": 0.00029576256644767976,
2978
+ "loss": 1.8202,
2979
+ "step": 414
2980
+ },
2981
+ {
2982
+ "epoch": 0.2331788172496137,
2983
+ "grad_norm": 1.4988555908203125,
2984
+ "learning_rate": 0.00029574166303626394,
2985
+ "loss": 1.9089,
2986
+ "step": 415
2987
+ },
2988
+ {
2989
+ "epoch": 0.23374069391768507,
2990
+ "grad_norm": 1.558443546295166,
2991
+ "learning_rate": 0.00029572070893534193,
2992
+ "loss": 1.7309,
2993
+ "step": 416
2994
+ },
2995
+ {
2996
+ "epoch": 0.23430257058575643,
2997
+ "grad_norm": 1.3139750957489014,
2998
+ "learning_rate": 0.00029569970415220173,
2999
+ "loss": 1.8066,
3000
+ "step": 417
3001
+ },
3002
+ {
3003
+ "epoch": 0.2348644472538278,
3004
+ "grad_norm": 1.653947114944458,
3005
+ "learning_rate": 0.0002956786486941488,
3006
+ "loss": 2.1868,
3007
+ "step": 418
3008
+ },
3009
+ {
3010
+ "epoch": 0.23542632392189913,
3011
+ "grad_norm": 1.4625102281570435,
3012
+ "learning_rate": 0.0002956575425685064,
3013
+ "loss": 2.0744,
3014
+ "step": 419
3015
+ },
3016
+ {
3017
+ "epoch": 0.2359882005899705,
3018
+ "grad_norm": 1.7257875204086304,
3019
+ "learning_rate": 0.0002956363857826152,
3020
+ "loss": 1.8357,
3021
+ "step": 420
3022
+ },
3023
+ {
3024
+ "epoch": 0.23655007725804186,
3025
+ "grad_norm": 1.5024346113204956,
3026
+ "learning_rate": 0.00029561517834383373,
3027
+ "loss": 2.0237,
3028
+ "step": 421
3029
+ },
3030
+ {
3031
+ "epoch": 0.23711195392611323,
3032
+ "grad_norm": 1.6755564212799072,
3033
+ "learning_rate": 0.0002955939202595379,
3034
+ "loss": 1.8129,
3035
+ "step": 422
3036
+ },
3037
+ {
3038
+ "epoch": 0.23767383059418457,
3039
+ "grad_norm": 1.4014532566070557,
3040
+ "learning_rate": 0.0002955726115371215,
3041
+ "loss": 2.0515,
3042
+ "step": 423
3043
+ },
3044
+ {
3045
+ "epoch": 0.23823570726225593,
3046
+ "grad_norm": 1.712101936340332,
3047
+ "learning_rate": 0.0002955512521839956,
3048
+ "loss": 2.1661,
3049
+ "step": 424
3050
+ },
3051
+ {
3052
+ "epoch": 0.2387975839303273,
3053
+ "grad_norm": 1.6710323095321655,
3054
+ "learning_rate": 0.00029552984220758925,
3055
+ "loss": 2.1425,
3056
+ "step": 425
3057
+ },
3058
+ {
3059
+ "epoch": 0.23935946059839866,
3060
+ "grad_norm": 2.7601799964904785,
3061
+ "learning_rate": 0.0002955083816153488,
3062
+ "loss": 2.3031,
3063
+ "step": 426
3064
+ },
3065
+ {
3066
+ "epoch": 0.23992133726647,
3067
+ "grad_norm": 2.1862637996673584,
3068
+ "learning_rate": 0.00029548687041473836,
3069
+ "loss": 1.8812,
3070
+ "step": 427
3071
+ },
3072
+ {
3073
+ "epoch": 0.24048321393454136,
3074
+ "grad_norm": 1.7939826250076294,
3075
+ "learning_rate": 0.0002954653086132396,
3076
+ "loss": 1.802,
3077
+ "step": 428
3078
+ },
3079
+ {
3080
+ "epoch": 0.24104509060261273,
3081
+ "grad_norm": 1.6978318691253662,
3082
+ "learning_rate": 0.0002954436962183518,
3083
+ "loss": 1.9085,
3084
+ "step": 429
3085
+ },
3086
+ {
3087
+ "epoch": 0.2416069672706841,
3088
+ "grad_norm": 1.7141228914260864,
3089
+ "learning_rate": 0.00029542203323759187,
3090
+ "loss": 1.8558,
3091
+ "step": 430
3092
+ },
3093
+ {
3094
+ "epoch": 0.24216884393875546,
3095
+ "grad_norm": 1.9624735116958618,
3096
+ "learning_rate": 0.0002954003196784942,
3097
+ "loss": 1.7955,
3098
+ "step": 431
3099
+ },
3100
+ {
3101
+ "epoch": 0.2427307206068268,
3102
+ "grad_norm": 1.9632998704910278,
3103
+ "learning_rate": 0.00029537855554861097,
3104
+ "loss": 2.0489,
3105
+ "step": 432
3106
+ },
3107
+ {
3108
+ "epoch": 0.24329259727489816,
3109
+ "grad_norm": 1.7335871458053589,
3110
+ "learning_rate": 0.0002953567408555117,
3111
+ "loss": 1.9226,
3112
+ "step": 433
3113
+ },
3114
+ {
3115
+ "epoch": 0.24385447394296952,
3116
+ "grad_norm": 1.5185022354125977,
3117
+ "learning_rate": 0.00029533487560678365,
3118
+ "loss": 2.1437,
3119
+ "step": 434
3120
+ },
3121
+ {
3122
+ "epoch": 0.2444163506110409,
3123
+ "grad_norm": 1.7918310165405273,
3124
+ "learning_rate": 0.00029531295981003174,
3125
+ "loss": 1.6909,
3126
+ "step": 435
3127
+ },
3128
+ {
3129
+ "epoch": 0.24497822727911223,
3130
+ "grad_norm": 1.853018879890442,
3131
+ "learning_rate": 0.00029529099347287826,
3132
+ "loss": 2.161,
3133
+ "step": 436
3134
+ },
3135
+ {
3136
+ "epoch": 0.2455401039471836,
3137
+ "grad_norm": 1.6407794952392578,
3138
+ "learning_rate": 0.00029526897660296316,
3139
+ "loss": 2.2005,
3140
+ "step": 437
3141
+ },
3142
+ {
3143
+ "epoch": 0.24610198061525496,
3144
+ "grad_norm": 1.9181418418884277,
3145
+ "learning_rate": 0.00029524690920794416,
3146
+ "loss": 2.0464,
3147
+ "step": 438
3148
+ },
3149
+ {
3150
+ "epoch": 0.24666385728332632,
3151
+ "grad_norm": 1.7185394763946533,
3152
+ "learning_rate": 0.0002952247912954962,
3153
+ "loss": 1.8386,
3154
+ "step": 439
3155
+ },
3156
+ {
3157
+ "epoch": 0.24722573395139766,
3158
+ "grad_norm": 2.1002471446990967,
3159
+ "learning_rate": 0.000295202622873312,
3160
+ "loss": 2.0351,
3161
+ "step": 440
3162
+ },
3163
+ {
3164
+ "epoch": 0.24778761061946902,
3165
+ "grad_norm": 2.016331672668457,
3166
+ "learning_rate": 0.00029518040394910195,
3167
+ "loss": 2.0552,
3168
+ "step": 441
3169
+ },
3170
+ {
3171
+ "epoch": 0.2483494872875404,
3172
+ "grad_norm": 1.7284388542175293,
3173
+ "learning_rate": 0.00029515813453059376,
3174
+ "loss": 1.8612,
3175
+ "step": 442
3176
+ },
3177
+ {
3178
+ "epoch": 0.24891136395561175,
3179
+ "grad_norm": 2.032498359680176,
3180
+ "learning_rate": 0.00029513581462553285,
3181
+ "loss": 2.1246,
3182
+ "step": 443
3183
+ },
3184
+ {
3185
+ "epoch": 0.2494732406236831,
3186
+ "grad_norm": 2.0629770755767822,
3187
+ "learning_rate": 0.0002951134442416822,
3188
+ "loss": 2.165,
3189
+ "step": 444
3190
+ },
3191
+ {
3192
+ "epoch": 0.2500351172917545,
3193
+ "grad_norm": 3.081624746322632,
3194
+ "learning_rate": 0.00029509102338682225,
3195
+ "loss": 2.3962,
3196
+ "step": 445
3197
+ },
3198
+ {
3199
+ "epoch": 0.2505969939598258,
3200
+ "grad_norm": 1.9752740859985352,
3201
+ "learning_rate": 0.0002950685520687511,
3202
+ "loss": 2.5579,
3203
+ "step": 446
3204
+ },
3205
+ {
3206
+ "epoch": 0.25115887062789716,
3207
+ "grad_norm": 2.6555721759796143,
3208
+ "learning_rate": 0.0002950460302952844,
3209
+ "loss": 2.335,
3210
+ "step": 447
3211
+ },
3212
+ {
3213
+ "epoch": 0.25172074729596855,
3214
+ "grad_norm": 2.179882526397705,
3215
+ "learning_rate": 0.00029502345807425523,
3216
+ "loss": 2.1026,
3217
+ "step": 448
3218
+ },
3219
+ {
3220
+ "epoch": 0.2522826239640399,
3221
+ "grad_norm": 2.176938533782959,
3222
+ "learning_rate": 0.0002950008354135143,
3223
+ "loss": 1.9971,
3224
+ "step": 449
3225
+ },
3226
+ {
3227
+ "epoch": 0.2528445006321112,
3228
+ "grad_norm": 4.19490385055542,
3229
+ "learning_rate": 0.00029497816232092997,
3230
+ "loss": 2.6225,
3231
+ "step": 450
3232
+ },
3233
+ {
3234
+ "epoch": 0.2528445006321112,
3235
+ "eval_loss": 2.0559065341949463,
3236
+ "eval_runtime": 68.6035,
3237
+ "eval_samples_per_second": 8.484,
3238
+ "eval_steps_per_second": 8.484,
3239
+ "step": 450
3240
  }
3241
  ],
3242
  "logging_steps": 1,
 
3251
  "early_stopping_threshold": 0.0
3252
  },
3253
  "attributes": {
3254
+ "early_stopping_patience_counter": 3
3255
  }
3256
  },
3257
  "TrainerControl": {
 
3260
  "should_evaluate": false,
3261
  "should_log": false,
3262
  "should_save": true,
3263
+ "should_training_stop": true
3264
  },
3265
  "attributes": {}
3266
  }
3267
  },
3268
+ "total_flos": 6.66702169767936e+16,
3269
  "train_batch_size": 1,
3270
  "trial_name": null,
3271
  "trial_params": null