FredericFan commited on
Commit
c0409bd
·
verified ·
1 Parent(s): e490584

Training in progress, step 20000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:505ab1c5a34c51302d4bbe4a328e757e4ed0c5b5a72411048c8f13bbf2635e0c
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1998440dc1fd1017b8e8ae1d999fce13a94dffb0b42736c732ef0d40ee60bd0
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7d32e738e2880d3c24e7619fc2f522beb9e9c3515dd4633aed48a29ef433cf5
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd973c109381ca03cc4c7ff8271e54697feff8b75e9d2abe1f7ad064426cfc27
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93c8742c8826e2530a7b338bfc6b583a37586dd446431d091cb023f04fe4b53a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eec7922e0e8f954a67c405890db8015d9bb8a0c99cdf61294b3077009dcff9eb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:588932b38df48361d8c321bcc1bc5968dc8111dbf384cded84b91367b4837f6a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf75de0e462da04981b7e7eaad4e35f0906a2b31e58f69cacf60ddca173fc0ea
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.08186879754066467,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-19000",
4
- "epoch": 1.56,
5
  "eval_steps": 500,
6
- "global_step": 19500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3049,6 +3049,84 @@
3049
  "eval_samples_per_second": 22.703,
3050
  "eval_steps_per_second": 5.676,
3051
  "step": 19500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3052
  }
3053
  ],
3054
  "logging_steps": 50,
@@ -3068,7 +3146,7 @@
3068
  "attributes": {}
3069
  }
3070
  },
3071
- "total_flos": 4.749871546368e+16,
3072
  "train_batch_size": 4,
3073
  "trial_name": null,
3074
  "trial_params": null
 
1
  {
2
  "best_metric": 0.08186879754066467,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-19000",
4
+ "epoch": 1.6,
5
  "eval_steps": 500,
6
+ "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3049
  "eval_samples_per_second": 22.703,
3050
  "eval_steps_per_second": 5.676,
3051
  "step": 19500
3052
+ },
3053
+ {
3054
+ "epoch": 1.564,
3055
+ "grad_norm": 0.22682276368141174,
3056
+ "learning_rate": 6.5472e-06,
3057
+ "loss": 0.0603,
3058
+ "step": 19550
3059
+ },
3060
+ {
3061
+ "epoch": 1.568,
3062
+ "grad_norm": 0.13181114196777344,
3063
+ "learning_rate": 6.4871999999999995e-06,
3064
+ "loss": 0.0497,
3065
+ "step": 19600
3066
+ },
3067
+ {
3068
+ "epoch": 1.572,
3069
+ "grad_norm": 0.1349440962076187,
3070
+ "learning_rate": 6.427200000000001e-06,
3071
+ "loss": 0.0507,
3072
+ "step": 19650
3073
+ },
3074
+ {
3075
+ "epoch": 1.576,
3076
+ "grad_norm": 0.1361471712589264,
3077
+ "learning_rate": 6.367200000000001e-06,
3078
+ "loss": 0.0501,
3079
+ "step": 19700
3080
+ },
3081
+ {
3082
+ "epoch": 1.58,
3083
+ "grad_norm": 0.25193363428115845,
3084
+ "learning_rate": 6.3072e-06,
3085
+ "loss": 0.0565,
3086
+ "step": 19750
3087
+ },
3088
+ {
3089
+ "epoch": 1.584,
3090
+ "grad_norm": 0.12969471514225006,
3091
+ "learning_rate": 6.2472e-06,
3092
+ "loss": 0.0616,
3093
+ "step": 19800
3094
+ },
3095
+ {
3096
+ "epoch": 1.588,
3097
+ "grad_norm": 0.18332916498184204,
3098
+ "learning_rate": 6.187200000000001e-06,
3099
+ "loss": 0.0513,
3100
+ "step": 19850
3101
+ },
3102
+ {
3103
+ "epoch": 1.592,
3104
+ "grad_norm": 0.19354714453220367,
3105
+ "learning_rate": 6.1272e-06,
3106
+ "loss": 0.0612,
3107
+ "step": 19900
3108
+ },
3109
+ {
3110
+ "epoch": 1.596,
3111
+ "grad_norm": 0.17420926690101624,
3112
+ "learning_rate": 6.0672e-06,
3113
+ "loss": 0.0604,
3114
+ "step": 19950
3115
+ },
3116
+ {
3117
+ "epoch": 1.6,
3118
+ "grad_norm": 0.09988817572593689,
3119
+ "learning_rate": 6.0072e-06,
3120
+ "loss": 0.0523,
3121
+ "step": 20000
3122
+ },
3123
+ {
3124
+ "epoch": 1.6,
3125
+ "eval_loss": 0.08189179003238678,
3126
+ "eval_runtime": 88.0418,
3127
+ "eval_samples_per_second": 22.716,
3128
+ "eval_steps_per_second": 5.679,
3129
+ "step": 20000
3130
  }
3131
  ],
3132
  "logging_steps": 50,
 
3146
  "attributes": {}
3147
  }
3148
  },
3149
+ "total_flos": 4.87166312448e+16,
3150
  "train_batch_size": 4,
3151
  "trial_name": null,
3152
  "trial_params": null