Ksjsjjdj commited on
Commit
6384451
·
verified ·
1 Parent(s): ff01cfb

Auto-save flat update: checkpoint-10000

Browse files
Files changed (4) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +144 -4
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:248fad6196016d5bb8fa97e7ca2cd99fdbc2e5115597a52bd0f7525d48177707
3
  size 17315288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b711109f0ad3f319d1bc6cd06062dae48744a3395a51ba0e6af33cece506c46c
3
  size 17315288
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e812adf6e44217b37bb95ea71945f039f7ed54815c7648506eacc625aec642b
3
  size 34640005
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72fed1de07ba7856e90f74545259c787b689f6231e854dd0b9e5288e17c92f2c
3
  size 34640005
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa4c7a3f0413da0222033791fbd61f87fa01f48b28114defa805cbadbcde3d7c
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a9f154b1359afdda4d3ed65870a0e8dc4d46259d77c6ba1858bc2095aee8d67
3
  size 1465
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.99,
5
  "eval_steps": 500,
6
- "global_step": 9900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -13867,6 +13867,146 @@
13867
  "learning_rate": 2.002002002002002e-06,
13868
  "loss": 2.3372,
13869
  "step": 9900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13870
  }
13871
  ],
13872
  "logging_steps": 5,
@@ -13881,12 +14021,12 @@
13881
  "should_evaluate": false,
13882
  "should_log": false,
13883
  "should_save": true,
13884
- "should_training_stop": false
13885
  },
13886
  "attributes": {}
13887
  }
13888
  },
13889
- "total_flos": 8169853236019200.0,
13890
  "train_batch_size": 4,
13891
  "trial_name": null,
13892
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
13867
  "learning_rate": 2.002002002002002e-06,
13868
  "loss": 2.3372,
13869
  "step": 9900
13870
+ },
13871
+ {
13872
+ "epoch": 0.9905,
13873
+ "grad_norm": 0.8105862140655518,
13874
+ "learning_rate": 1.9019019019019022e-06,
13875
+ "loss": 2.3333,
13876
+ "step": 9905
13877
+ },
13878
+ {
13879
+ "epoch": 0.991,
13880
+ "grad_norm": 0.8059213757514954,
13881
+ "learning_rate": 1.801801801801802e-06,
13882
+ "loss": 2.3359,
13883
+ "step": 9910
13884
+ },
13885
+ {
13886
+ "epoch": 0.9915,
13887
+ "grad_norm": 0.818318247795105,
13888
+ "learning_rate": 1.7017017017017019e-06,
13889
+ "loss": 2.3476,
13890
+ "step": 9915
13891
+ },
13892
+ {
13893
+ "epoch": 0.992,
13894
+ "grad_norm": 0.7882871031761169,
13895
+ "learning_rate": 1.6016016016016016e-06,
13896
+ "loss": 2.3351,
13897
+ "step": 9920
13898
+ },
13899
+ {
13900
+ "epoch": 0.9925,
13901
+ "grad_norm": 0.817363440990448,
13902
+ "learning_rate": 1.5015015015015015e-06,
13903
+ "loss": 2.3523,
13904
+ "step": 9925
13905
+ },
13906
+ {
13907
+ "epoch": 0.993,
13908
+ "grad_norm": 0.8170154094696045,
13909
+ "learning_rate": 1.4014014014014014e-06,
13910
+ "loss": 2.3567,
13911
+ "step": 9930
13912
+ },
13913
+ {
13914
+ "epoch": 0.9935,
13915
+ "grad_norm": 0.8051609992980957,
13916
+ "learning_rate": 1.3013013013013014e-06,
13917
+ "loss": 2.3376,
13918
+ "step": 9935
13919
+ },
13920
+ {
13921
+ "epoch": 0.994,
13922
+ "grad_norm": 0.8126572370529175,
13923
+ "learning_rate": 1.2012012012012013e-06,
13924
+ "loss": 2.3451,
13925
+ "step": 9940
13926
+ },
13927
+ {
13928
+ "epoch": 0.9945,
13929
+ "grad_norm": 0.8231433629989624,
13930
+ "learning_rate": 1.1011011011011012e-06,
13931
+ "loss": 2.3174,
13932
+ "step": 9945
13933
+ },
13934
+ {
13935
+ "epoch": 0.995,
13936
+ "grad_norm": 0.8126521110534668,
13937
+ "learning_rate": 1.001001001001001e-06,
13938
+ "loss": 2.3333,
13939
+ "step": 9950
13940
+ },
13941
+ {
13942
+ "epoch": 0.9955,
13943
+ "grad_norm": 0.7962733507156372,
13944
+ "learning_rate": 9.00900900900901e-07,
13945
+ "loss": 2.3245,
13946
+ "step": 9955
13947
+ },
13948
+ {
13949
+ "epoch": 0.996,
13950
+ "grad_norm": 0.8092034459114075,
13951
+ "learning_rate": 8.008008008008008e-07,
13952
+ "loss": 2.3472,
13953
+ "step": 9960
13954
+ },
13955
+ {
13956
+ "epoch": 0.9965,
13957
+ "grad_norm": 0.8146346807479858,
13958
+ "learning_rate": 7.007007007007007e-07,
13959
+ "loss": 2.3337,
13960
+ "step": 9965
13961
+ },
13962
+ {
13963
+ "epoch": 0.997,
13964
+ "grad_norm": 0.7841795086860657,
13965
+ "learning_rate": 6.006006006006006e-07,
13966
+ "loss": 2.3382,
13967
+ "step": 9970
13968
+ },
13969
+ {
13970
+ "epoch": 0.9975,
13971
+ "grad_norm": 0.7981340885162354,
13972
+ "learning_rate": 5.005005005005005e-07,
13973
+ "loss": 2.3166,
13974
+ "step": 9975
13975
+ },
13976
+ {
13977
+ "epoch": 0.998,
13978
+ "grad_norm": 0.8195034861564636,
13979
+ "learning_rate": 4.004004004004004e-07,
13980
+ "loss": 2.325,
13981
+ "step": 9980
13982
+ },
13983
+ {
13984
+ "epoch": 0.9985,
13985
+ "grad_norm": 0.8242753148078918,
13986
+ "learning_rate": 3.003003003003003e-07,
13987
+ "loss": 2.3474,
13988
+ "step": 9985
13989
+ },
13990
+ {
13991
+ "epoch": 0.999,
13992
+ "grad_norm": 0.9042975902557373,
13993
+ "learning_rate": 2.002002002002002e-07,
13994
+ "loss": 2.322,
13995
+ "step": 9990
13996
+ },
13997
+ {
13998
+ "epoch": 0.9995,
13999
+ "grad_norm": 0.8336848020553589,
14000
+ "learning_rate": 1.001001001001001e-07,
14001
+ "loss": 2.3474,
14002
+ "step": 9995
14003
+ },
14004
+ {
14005
+ "epoch": 1.0,
14006
+ "grad_norm": 0.8093213438987732,
14007
+ "learning_rate": 0.0,
14008
+ "loss": 2.3473,
14009
+ "step": 10000
14010
  }
14011
  ],
14012
  "logging_steps": 5,
 
14021
  "should_evaluate": false,
14022
  "should_log": false,
14023
  "should_save": true,
14024
+ "should_training_stop": true
14025
  },
14026
  "attributes": {}
14027
  }
14028
  },
14029
+ "total_flos": 8252377006080000.0,
14030
  "train_batch_size": 4,
14031
  "trial_name": null,
14032
  "trial_params": null