FredericFan commited on
Commit
802b425
·
verified ·
1 Parent(s): 30bfcca

Training in progress, step 7000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6285fa313f0947673eec5233d58debefd79860b88f1e8817dbce58e3f753178
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c5f184b1d46adf06c909c07ec09ec73f59fa0356e2bb6d27bf1d629195e514e
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dbee5bdfb983c0e941bb4bb09b7db4d65254e5039bb314ca888b9a074cade77
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d751b2339dd99faad4d27209d8ea82d6e6756bf23742c7710a4679bed5de6ad
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7da8bfa9479088a603d5d711484f1c342c030cc789c3b380976c39f9fdee1630
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2abd40401eddc2bd3c6c44fde55af2f1f3f33fa474fa02d7522b22eefad8987
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e8c3d9b9adef7fe2e8d44bfba6c38d304f6dc2baa719635d5abe8206c21dc4c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52ad8d98ce7fdace8246d70df783ac4b591617302d583209f66506bee9bb36f7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.08497656136751175,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-6500",
4
- "epoch": 0.52,
5
  "eval_steps": 500,
6
- "global_step": 6500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1021,6 +1021,84 @@
1021
  "eval_samples_per_second": 22.813,
1022
  "eval_steps_per_second": 5.703,
1023
  "step": 6500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1024
  }
1025
  ],
1026
  "logging_steps": 50,
@@ -1040,7 +1118,7 @@
1040
  "attributes": {}
1041
  }
1042
  },
1043
- "total_flos": 1.583290515456e+16,
1044
  "train_batch_size": 4,
1045
  "trial_name": null,
1046
  "trial_params": null
 
1
  {
2
  "best_metric": 0.08497656136751175,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-6500",
4
+ "epoch": 0.56,
5
  "eval_steps": 500,
6
+ "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1021
  "eval_samples_per_second": 22.813,
1022
  "eval_steps_per_second": 5.703,
1023
  "step": 6500
1024
+ },
1025
+ {
1026
+ "epoch": 0.524,
1027
+ "grad_norm": 0.24468739330768585,
1028
+ "learning_rate": 2.21424e-05,
1029
+ "loss": 0.0688,
1030
+ "step": 6550
1031
+ },
1032
+ {
1033
+ "epoch": 0.528,
1034
+ "grad_norm": 0.17887485027313232,
1035
+ "learning_rate": 2.20824e-05,
1036
+ "loss": 0.0645,
1037
+ "step": 6600
1038
+ },
1039
+ {
1040
+ "epoch": 0.532,
1041
+ "grad_norm": 0.12986980378627777,
1042
+ "learning_rate": 2.2022399999999998e-05,
1043
+ "loss": 0.0609,
1044
+ "step": 6650
1045
+ },
1046
+ {
1047
+ "epoch": 0.536,
1048
+ "grad_norm": 0.25361281633377075,
1049
+ "learning_rate": 2.1962399999999998e-05,
1050
+ "loss": 0.0603,
1051
+ "step": 6700
1052
+ },
1053
+ {
1054
+ "epoch": 0.54,
1055
+ "grad_norm": 0.1815791130065918,
1056
+ "learning_rate": 2.19024e-05,
1057
+ "loss": 0.0659,
1058
+ "step": 6750
1059
+ },
1060
+ {
1061
+ "epoch": 0.544,
1062
+ "grad_norm": 0.12782719731330872,
1063
+ "learning_rate": 2.18424e-05,
1064
+ "loss": 0.0641,
1065
+ "step": 6800
1066
+ },
1067
+ {
1068
+ "epoch": 0.548,
1069
+ "grad_norm": 0.1801528036594391,
1070
+ "learning_rate": 2.17824e-05,
1071
+ "loss": 0.0666,
1072
+ "step": 6850
1073
+ },
1074
+ {
1075
+ "epoch": 0.552,
1076
+ "grad_norm": 0.1247314065694809,
1077
+ "learning_rate": 2.17224e-05,
1078
+ "loss": 0.0592,
1079
+ "step": 6900
1080
+ },
1081
+ {
1082
+ "epoch": 0.556,
1083
+ "grad_norm": 0.19411933422088623,
1084
+ "learning_rate": 2.16624e-05,
1085
+ "loss": 0.0688,
1086
+ "step": 6950
1087
+ },
1088
+ {
1089
+ "epoch": 0.56,
1090
+ "grad_norm": 0.11316727846860886,
1091
+ "learning_rate": 2.1602400000000002e-05,
1092
+ "loss": 0.0635,
1093
+ "step": 7000
1094
+ },
1095
+ {
1096
+ "epoch": 0.56,
1097
+ "eval_loss": 0.08500248938798904,
1098
+ "eval_runtime": 87.7238,
1099
+ "eval_samples_per_second": 22.799,
1100
+ "eval_steps_per_second": 5.7,
1101
+ "step": 7000
1102
  }
1103
  ],
1104
  "logging_steps": 50,
 
1118
  "attributes": {}
1119
  }
1120
  },
1121
+ "total_flos": 1.705082093568e+16,
1122
  "train_batch_size": 4,
1123
  "trial_name": null,
1124
  "trial_params": null