irishprancer commited on
Commit
cf47d24
·
verified ·
1 Parent(s): 918baa3

Training in progress, step 4200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cee57ad0381bc1b055cba8019e3874d3d51ea7718ee34053bb8a093241cdc0ca
3
  size 1482788592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4c395680d89b01821da1bb33984f1c2e9553029f87090f8cd3c027b66de846e
3
  size 1482788592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96ef3fe806149fc64c6859504d4f7f7bd12ece1c70eebd5bc5f528bccee2fb8f
3
  size 2897966842
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99f6515a9eba4ffb6aed2e9196810686af54d3882ecef9ebccbe475775dec4c1
3
  size 2897966842
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc5e70602abcdd87279e45b3ea66eb0df7e6c6689b3fc21feb99b888c0268021
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2aba1e092bb5e9c7cb4142b16fd16e351b46865a4a17fbe78a8e97a303f189e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f678cfbe535c0958633b7acde2a94c53563cd625fa79dcdd0634f74c4367210
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba021163979d3718a3a614ea92e798f442b4a1460b3153e40b61917eeda84568
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.2179657220840454,
3
- "best_model_checkpoint": "./output/checkpoint-4050",
4
- "epoch": 0.2676977989292088,
5
  "eval_steps": 150,
6
- "global_step": 4050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3058,6 +3058,119 @@
3058
  "eval_samples_per_second": 9.415,
3059
  "eval_steps_per_second": 9.415,
3060
  "step": 4050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3061
  }
3062
  ],
3063
  "logging_steps": 10,
@@ -3077,7 +3190,7 @@
3077
  "attributes": {}
3078
  }
3079
  },
3080
- "total_flos": 2.8018239306596352e+17,
3081
  "train_batch_size": 4,
3082
  "trial_name": null,
3083
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.2168010473251343,
3
+ "best_model_checkpoint": "./output/checkpoint-4200",
4
+ "epoch": 0.2776125322228832,
5
  "eval_steps": 150,
6
+ "global_step": 4200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3058
  "eval_samples_per_second": 9.415,
3059
  "eval_steps_per_second": 9.415,
3060
  "step": 4050
3061
+ },
3062
+ {
3063
+ "epoch": 0.2683587811487871,
3064
+ "grad_norm": 11.658596992492676,
3065
+ "learning_rate": 1.9697137208823396e-06,
3066
+ "loss": 1.172,
3067
+ "step": 4060
3068
+ },
3069
+ {
3070
+ "epoch": 0.2690197633683654,
3071
+ "grad_norm": 5.082404613494873,
3072
+ "learning_rate": 1.9292707397221775e-06,
3073
+ "loss": 1.1331,
3074
+ "step": 4070
3075
+ },
3076
+ {
3077
+ "epoch": 0.2696807455879437,
3078
+ "grad_norm": 13.126559257507324,
3079
+ "learning_rate": 1.8892080337807171e-06,
3080
+ "loss": 1.1899,
3081
+ "step": 4080
3082
+ },
3083
+ {
3084
+ "epoch": 0.27034172780752197,
3085
+ "grad_norm": 11.264731407165527,
3086
+ "learning_rate": 1.8495272498788887e-06,
3087
+ "loss": 1.0929,
3088
+ "step": 4090
3089
+ },
3090
+ {
3091
+ "epoch": 0.27100271002710025,
3092
+ "grad_norm": 12.232498168945312,
3093
+ "learning_rate": 1.8102300191383008e-06,
3094
+ "loss": 1.1517,
3095
+ "step": 4100
3096
+ },
3097
+ {
3098
+ "epoch": 0.2716636922466786,
3099
+ "grad_norm": 6.517210483551025,
3100
+ "learning_rate": 1.7713179569141897e-06,
3101
+ "loss": 1.1451,
3102
+ "step": 4110
3103
+ },
3104
+ {
3105
+ "epoch": 0.27232467446625686,
3106
+ "grad_norm": 10.073516845703125,
3107
+ "learning_rate": 1.7327926627290298e-06,
3108
+ "loss": 1.1757,
3109
+ "step": 4120
3110
+ },
3111
+ {
3112
+ "epoch": 0.27298565668583513,
3113
+ "grad_norm": 10.904183387756348,
3114
+ "learning_rate": 1.6946557202067662e-06,
3115
+ "loss": 1.201,
3116
+ "step": 4130
3117
+ },
3118
+ {
3119
+ "epoch": 0.27364663890541346,
3120
+ "grad_norm": 9.502151489257812,
3121
+ "learning_rate": 1.6569086970077352e-06,
3122
+ "loss": 1.1649,
3123
+ "step": 4140
3124
+ },
3125
+ {
3126
+ "epoch": 0.27430762112499174,
3127
+ "grad_norm": 12.71923542022705,
3128
+ "learning_rate": 1.6195531447642177e-06,
3129
+ "loss": 1.2048,
3130
+ "step": 4150
3131
+ },
3132
+ {
3133
+ "epoch": 0.27496860334457,
3134
+ "grad_norm": 13.27767562866211,
3135
+ "learning_rate": 1.582590599016653e-06,
3136
+ "loss": 1.0894,
3137
+ "step": 4160
3138
+ },
3139
+ {
3140
+ "epoch": 0.27562958556414835,
3141
+ "grad_norm": 12.859643936157227,
3142
+ "learning_rate": 1.5460225791505258e-06,
3143
+ "loss": 1.1565,
3144
+ "step": 4170
3145
+ },
3146
+ {
3147
+ "epoch": 0.2762905677837266,
3148
+ "grad_norm": 6.589792728424072,
3149
+ "learning_rate": 1.509850588333905e-06,
3150
+ "loss": 1.0296,
3151
+ "step": 4180
3152
+ },
3153
+ {
3154
+ "epoch": 0.2769515500033049,
3155
+ "grad_norm": 13.752243995666504,
3156
+ "learning_rate": 1.4740761134556557e-06,
3157
+ "loss": 1.312,
3158
+ "step": 4190
3159
+ },
3160
+ {
3161
+ "epoch": 0.2776125322228832,
3162
+ "grad_norm": 12.691303253173828,
3163
+ "learning_rate": 1.4387006250643236e-06,
3164
+ "loss": 1.1494,
3165
+ "step": 4200
3166
+ },
3167
+ {
3168
+ "epoch": 0.2776125322228832,
3169
+ "eval_loss": 1.2168010473251343,
3170
+ "eval_runtime": 51.4283,
3171
+ "eval_samples_per_second": 9.742,
3172
+ "eval_steps_per_second": 9.742,
3173
+ "step": 4200
3174
  }
3175
  ],
3176
  "logging_steps": 10,
 
3190
  "attributes": {}
3191
  }
3192
  },
3193
+ "total_flos": 2.9003778620227584e+17,
3194
  "train_batch_size": 4,
3195
  "trial_name": null,
3196
  "trial_params": null