Sabbir772 commited on
Commit
a45fb67
·
verified ·
1 Parent(s): abe4803

Training in progress, epoch 27, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ff908266d6d71858dfb9a174963f66db89a006659d5d2805fe404646128e987
3
  size 990185320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:727012ff11d243aa3f50de8258ad959513310b901aef61c0f3c57fba9b72cea7
3
  size 990185320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27318dd665b2a337fcb4e1b1dc90f1e6f0d153a1e0cb6777337f04cd4fecc0b4
3
  size 1980541387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cc975ad13f0fa89a81b5fa79d6bf3b2541171d36c7eb13e67661a24c57543b1
3
  size 1980541387
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:285ff2ae5a404cbf796cda2dc5c3c268099da23ad4443ee6155b34b1c082a741
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a55fd7ac78bf4b5e6b13f1c5a1f5fb7258744c10b012de8c0c0b5edc12e58da
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5c961e41c178c68f65552114a158b8cd6a4f2860bbc78cf4602c6ea2ecddc79
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c84c0eceb8506ba57b63c709b65cc1be3871eb480c744855060e8151b4276a67
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 26.0,
6
  "eval_steps": 500,
7
- "global_step": 40014,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3016,6 +3016,119 @@
3016
  "eval_samples_per_second": 22.077,
3017
  "eval_steps_per_second": 2.76,
3018
  "step": 40014
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3019
  }
3020
  ],
3021
  "logging_steps": 100,
@@ -3035,7 +3148,7 @@
3035
  "attributes": {}
3036
  }
3037
  },
3038
- "total_flos": 5.509646094119731e+16,
3039
  "train_batch_size": 8,
3040
  "trial_name": null,
3041
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 27.0,
6
  "eval_steps": 500,
7
+ "global_step": 41553,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3016
  "eval_samples_per_second": 22.077,
3017
  "eval_steps_per_second": 2.76,
3018
  "step": 40014
3019
+ },
3020
+ {
3021
+ "epoch": 26.055880441845353,
3022
+ "grad_norm": 4.097437858581543,
3023
+ "learning_rate": 6.574615551223738e-06,
3024
+ "loss": 1.3986,
3025
+ "step": 40100
3026
+ },
3027
+ {
3028
+ "epoch": 26.120857699805068,
3029
+ "grad_norm": 6.5821123123168945,
3030
+ "learning_rate": 6.466320121290882e-06,
3031
+ "loss": 1.4878,
3032
+ "step": 40200
3033
+ },
3034
+ {
3035
+ "epoch": 26.185834957764783,
3036
+ "grad_norm": 3.4001429080963135,
3037
+ "learning_rate": 6.3580246913580246e-06,
3038
+ "loss": 1.498,
3039
+ "step": 40300
3040
+ },
3041
+ {
3042
+ "epoch": 26.250812215724498,
3043
+ "grad_norm": 4.594339370727539,
3044
+ "learning_rate": 6.249729261425168e-06,
3045
+ "loss": 1.4616,
3046
+ "step": 40400
3047
+ },
3048
+ {
3049
+ "epoch": 26.31578947368421,
3050
+ "grad_norm": 7.947396755218506,
3051
+ "learning_rate": 6.141433831492311e-06,
3052
+ "loss": 1.4703,
3053
+ "step": 40500
3054
+ },
3055
+ {
3056
+ "epoch": 26.380766731643924,
3057
+ "grad_norm": 4.686864376068115,
3058
+ "learning_rate": 6.033138401559455e-06,
3059
+ "loss": 1.4843,
3060
+ "step": 40600
3061
+ },
3062
+ {
3063
+ "epoch": 26.44574398960364,
3064
+ "grad_norm": 4.843585014343262,
3065
+ "learning_rate": 5.924842971626597e-06,
3066
+ "loss": 1.4536,
3067
+ "step": 40700
3068
+ },
3069
+ {
3070
+ "epoch": 26.510721247563353,
3071
+ "grad_norm": 3.5781548023223877,
3072
+ "learning_rate": 5.816547541693741e-06,
3073
+ "loss": 1.4089,
3074
+ "step": 40800
3075
+ },
3076
+ {
3077
+ "epoch": 26.575698505523068,
3078
+ "grad_norm": 4.236611843109131,
3079
+ "learning_rate": 5.708252111760884e-06,
3080
+ "loss": 1.4653,
3081
+ "step": 40900
3082
+ },
3083
+ {
3084
+ "epoch": 26.640675763482783,
3085
+ "grad_norm": 4.144320011138916,
3086
+ "learning_rate": 5.599956681828028e-06,
3087
+ "loss": 1.4765,
3088
+ "step": 41000
3089
+ },
3090
+ {
3091
+ "epoch": 26.705653021442494,
3092
+ "grad_norm": 3.518115997314453,
3093
+ "learning_rate": 5.49166125189517e-06,
3094
+ "loss": 1.447,
3095
+ "step": 41100
3096
+ },
3097
+ {
3098
+ "epoch": 26.77063027940221,
3099
+ "grad_norm": 4.764077663421631,
3100
+ "learning_rate": 5.383365821962314e-06,
3101
+ "loss": 1.4214,
3102
+ "step": 41200
3103
+ },
3104
+ {
3105
+ "epoch": 26.835607537361923,
3106
+ "grad_norm": 9.616331100463867,
3107
+ "learning_rate": 5.275070392029457e-06,
3108
+ "loss": 1.4519,
3109
+ "step": 41300
3110
+ },
3111
+ {
3112
+ "epoch": 26.900584795321638,
3113
+ "grad_norm": 3.7429463863372803,
3114
+ "learning_rate": 5.1667749620966e-06,
3115
+ "loss": 1.4603,
3116
+ "step": 41400
3117
+ },
3118
+ {
3119
+ "epoch": 26.965562053281353,
3120
+ "grad_norm": 5.070949077606201,
3121
+ "learning_rate": 5.058479532163743e-06,
3122
+ "loss": 1.4638,
3123
+ "step": 41500
3124
+ },
3125
+ {
3126
+ "epoch": 27.0,
3127
+ "eval_loss": 1.3732537031173706,
3128
+ "eval_runtime": 61.9052,
3129
+ "eval_samples_per_second": 22.098,
3130
+ "eval_steps_per_second": 2.762,
3131
+ "step": 41553
3132
  }
3133
  ],
3134
  "logging_steps": 100,
 
3148
  "attributes": {}
3149
  }
3150
  },
3151
+ "total_flos": 5.931144958338662e+16,
3152
  "train_batch_size": 8,
3153
  "trial_name": null,
3154
  "trial_params": null