rippertnt commited on
Commit
3b74aa2
·
1 Parent(s): 5960742

Upload 14 files

Browse files
latest CHANGED
@@ -1 +1 @@
1
- global_step2000
 
1
+ global_step8000
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e66fff2a6bbfe0a0077fb9179b3d1441781be24e39a82a6c8106c3cdea18d6ec
3
- size 123569645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d990b51ce673b6fd2abc89e19c8de3396176bd4b7629d4afe284e263eeb25b50
3
+ size 23657822141
rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e75c96f06b249e57a701db73ce821398e69672027a86d3a44063830602a29ab4
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e3c5cb412e12159a59afe5657ce4b5e0a06e7fb420bedbb5228fe1245702762
3
  size 14583
rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6dae7f45b6bac644ac207a61f43cba6d4b919a4cac22022bbb02907914422f5d
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:741230672078323886b763e522c728741456a587860909fc529ce815a7aca5ec
3
  size 14583
rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e53c770fe48635faad7fa341007d771781f1397cd47daab5b58f879ffb65f178
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ea587886b41579993bb5d20c79047b968ae2d71d22ba4c739b07ce31d7486a6
3
  size 14583
rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68692af1001e65d02e07ac9974ccf4c332cfb23bc8f89566e1a908b1f2c4a1ed
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ab727740f74dd67e60283d27b4339609a1dda888b067cc06520e2f1d7dc17db
3
  size 14583
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9992992291520673,
5
- "global_step": 8025,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -48006,170 +48006,11 @@
48006
  "learning_rate": 2e-05,
48007
  "loss": 0.4429,
48008
  "step": 8000
48009
- },
48010
- {
48011
- "epoch": 2.99,
48012
- "learning_rate": 2e-05,
48013
- "loss": 0.5603,
48014
- "step": 8001
48015
- },
48016
- {
48017
- "epoch": 2.99,
48018
- "learning_rate": 2e-05,
48019
- "loss": 0.5119,
48020
- "step": 8002
48021
- },
48022
- {
48023
- "epoch": 2.99,
48024
- "learning_rate": 2e-05,
48025
- "loss": 0.3574,
48026
- "step": 8003
48027
- },
48028
- {
48029
- "epoch": 2.99,
48030
- "learning_rate": 2e-05,
48031
- "loss": 0.4055,
48032
- "step": 8004
48033
- },
48034
- {
48035
- "epoch": 2.99,
48036
- "learning_rate": 2e-05,
48037
- "loss": 0.6877,
48038
- "step": 8005
48039
- },
48040
- {
48041
- "epoch": 2.99,
48042
- "learning_rate": 2e-05,
48043
- "loss": 0.3634,
48044
- "step": 8006
48045
- },
48046
- {
48047
- "epoch": 2.99,
48048
- "learning_rate": 2e-05,
48049
- "loss": 0.4054,
48050
- "step": 8007
48051
- },
48052
- {
48053
- "epoch": 2.99,
48054
- "learning_rate": 2e-05,
48055
- "loss": 0.3723,
48056
- "step": 8008
48057
- },
48058
- {
48059
- "epoch": 2.99,
48060
- "learning_rate": 2e-05,
48061
- "loss": 0.4081,
48062
- "step": 8009
48063
- },
48064
- {
48065
- "epoch": 2.99,
48066
- "learning_rate": 2e-05,
48067
- "loss": 0.4419,
48068
- "step": 8010
48069
- },
48070
- {
48071
- "epoch": 2.99,
48072
- "learning_rate": 2e-05,
48073
- "loss": 0.6377,
48074
- "step": 8011
48075
- },
48076
- {
48077
- "epoch": 2.99,
48078
- "learning_rate": 2e-05,
48079
- "loss": 0.5082,
48080
- "step": 8012
48081
- },
48082
- {
48083
- "epoch": 2.99,
48084
- "learning_rate": 2e-05,
48085
- "loss": 0.5274,
48086
- "step": 8013
48087
- },
48088
- {
48089
- "epoch": 3.0,
48090
- "learning_rate": 2e-05,
48091
- "loss": 0.4954,
48092
- "step": 8014
48093
- },
48094
- {
48095
- "epoch": 3.0,
48096
- "learning_rate": 2e-05,
48097
- "loss": 0.617,
48098
- "step": 8015
48099
- },
48100
- {
48101
- "epoch": 3.0,
48102
- "learning_rate": 2e-05,
48103
- "loss": 0.4943,
48104
- "step": 8016
48105
- },
48106
- {
48107
- "epoch": 3.0,
48108
- "learning_rate": 2e-05,
48109
- "loss": 0.3116,
48110
- "step": 8017
48111
- },
48112
- {
48113
- "epoch": 3.0,
48114
- "learning_rate": 2e-05,
48115
- "loss": 0.4602,
48116
- "step": 8018
48117
- },
48118
- {
48119
- "epoch": 3.0,
48120
- "learning_rate": 2e-05,
48121
- "loss": 0.4009,
48122
- "step": 8019
48123
- },
48124
- {
48125
- "epoch": 3.0,
48126
- "learning_rate": 2e-05,
48127
- "loss": 0.4631,
48128
- "step": 8020
48129
- },
48130
- {
48131
- "epoch": 3.0,
48132
- "learning_rate": 2e-05,
48133
- "loss": 0.3465,
48134
- "step": 8021
48135
- },
48136
- {
48137
- "epoch": 3.0,
48138
- "learning_rate": 2e-05,
48139
- "loss": 0.6339,
48140
- "step": 8022
48141
- },
48142
- {
48143
- "epoch": 3.0,
48144
- "learning_rate": 2e-05,
48145
- "loss": 0.3831,
48146
- "step": 8023
48147
- },
48148
- {
48149
- "epoch": 3.0,
48150
- "learning_rate": 2e-05,
48151
- "loss": 0.4769,
48152
- "step": 8024
48153
- },
48154
- {
48155
- "epoch": 3.0,
48156
- "learning_rate": 2e-05,
48157
- "loss": 0.5142,
48158
- "step": 8025
48159
- },
48160
- {
48161
- "epoch": 3.0,
48162
- "step": 8025,
48163
- "total_flos": 1027959387144192.0,
48164
- "train_loss": 1.0478906600497593,
48165
- "train_runtime": 551322.7584,
48166
- "train_samples_per_second": 0.466,
48167
- "train_steps_per_second": 0.015
48168
  }
48169
  ],
48170
  "max_steps": 8025,
48171
  "num_train_epochs": 3,
48172
- "total_flos": 1027959387144192.0,
48173
  "trial_name": null,
48174
  "trial_params": null
48175
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.989955617846298,
5
+ "global_step": 8000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
48006
  "learning_rate": 2e-05,
48007
  "loss": 0.4429,
48008
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48009
  }
48010
  ],
48011
  "max_steps": 8025,
48012
  "num_train_epochs": 3,
48013
+ "total_flos": 1024787635126272.0,
48014
  "trial_name": null,
48015
  "trial_params": null
48016
  }